RubyGems - indeedparser - Versions diffs - 0.0.1 - Mend

indeedparser 0.0.1

Files changed (13) hide show

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 2516696c6e9a799fbc03e3c1189b9b6273649b2c
+  data.tar.gz: 22034fac47e8b9eaab688a1427264cc727b09dbf
+SHA512:
+  metadata.gz: 815a153b6f9af990cba3ecc5061b8576a2b459b9a08b86fbf0ddd3f67e11c42824c449453846c43b2a64e0d43bc0e79be0f396761fd081ac96464b4b792f8a49
+  data.tar.gz: 71307ef7ad393cae5a201ea628a44b39f3763a40709336b1ade3cd08ca32a4f01a9cea277658b38476262da445cdfd7a9d31a625b71ec69187186327c6a6ff07

data/lib/awards.rb ADDED Viewed

@@ -0,0 +1,38 @@
+require 'nokogiri'
+load 'utilities.rb'
+class Awards
+  include Utilities
+  def initialize(html)
+    awards = html.xpath("//div[contains(concat(' ',normalize-space(@class),' '),' award-section ')]")
+    @award_list = Array.new
+    awards.each do |award|
+      @award_list.push({
+                        award_title: award_title(award),
+                        award_date: award_date(award),
+                        award_description: award_description(award)
+      })
+    end
+  end
+  # Return award info
+  def get_awards
+    return @award_list
+  end
+  # Get title of award
+  def award_title(award)
+    award.xpath(".//p[@class='award_title']").text
+  end
+  # Get award date
+  def award_date(award)
+    award.xpath(".//p[@class='award_date']").text
+  end
+  # Get award description
+  def award_description(award)
+    award.xpath(".//p[@class='award_description']").text
+  end
+end

data/lib/certifications.rb ADDED Viewed

@@ -0,0 +1,44 @@
+require 'nokogiri'
+load 'utilities.rb'
+class Certifications
+  include Utilities
+  def initialize(html)
+    certifications = html.xpath("//div[contains(concat(' ',normalize-space(@class),' '),' certification-section ')]")
+    @certification_list = Array.new
+    certifications.each do |certification|
+      @certification_list.push({
+                                 cert_title: cert_title(certification),
+                                 cert_description: cert_description(certification),
+                                 cert_start_date: cert_start_date(certification),
+                                 cert_end_date: cert_end_date(certification)
+      })
+    end
+  end
+  # Return cert info
+  def get_certifications
+    return @certification_list
+  end
+  # Get title of cert
+  def cert_title(certification)
+    certification.xpath(".//p[@class='certification_title']").text
+  end
+  # Get description of cert
+  def cert_description(certification)
+    certification.xpath(".//p[@class='certification_description']").text
+  end
+  # Get start date for cert validity
+  def cert_start_date(certification)
+    parse_dates(certification.xpath(".//p[@class='certification_date']").text)[0]
+  end
+  # Get cert end date
+  def cert_end_date(certification)
+    parse_dates(certification.xpath(".//p[@class='certification_date']").text)[1]
+  end
+end

data/lib/degrees.rb ADDED Viewed

@@ -0,0 +1,50 @@
+require 'nokogiri'
+load 'utilities.rb'
+class Degrees
+  include Utilities
+  def initialize(html)
+    degrees = html.xpath("//div[@itemtype='http://schema.org/EducationalOrganization']")
+    @degree_list = Array.new
+    degrees.each do |degree|
+      @degree_list.push({
+                          school: school(degree),
+                          degree_title: degree_title(degree),
+                          school_location: school_location(degree),
+                          degree_start_date: degree_start_date(degree),
+                          degree_end_date: degree_end_date(degree)
+      })
+    end
+  end
+  # Return degree info
+  def get_degrees
+    return @degree_list
+  end
+  # Get school name
+  def school(degree)
+    degree.xpath(".//span[@itemprop='name']").text
+  end
+  # Get title of degree
+  def degree_title(degree)
+    degree.xpath(".//p[@class='edu_title']").text
+  end
+  # Get where the school is
+  def school_location(degree)
+    degree.xpath(".//span[@itemprop='addressLocality']").text
+  end
+  # Get start date for degree
+  def degree_start_date(degree)
+    parse_dates(degree.xpath(".//p[@class='edu_dates']").text)[0]
+  end
+  # Get degree end date
+  def degree_end_date(degree)
+    parse_dates(degree.xpath(".//p[@class='edu_dates']").text)[1]
+  end
+end

data/lib/groups.rb ADDED Viewed

@@ -0,0 +1,44 @@
+require 'nokogiri'
+load 'utilities.rb'
+class Groups
+  include Utilities
+  def initialize(html)
+    groups = html.xpath("//div[contains(concat(' ',normalize-space(@class),' '),' group-section ')]")
+    @group_list = Array.new
+    groups.each do |group|
+      @group_list.push({
+                                 group_title: group_title(group),
+                                 group_description: group_description(group),
+                                 group_start_date: group_start_date(group),
+                                 group_end_date: group_end_date(group)
+      })
+    end
+  end
+  # Return group info
+  def get_groups
+    return @group_list
+  end
+  # Get title of group
+  def group_title(group)
+    group.xpath(".//p[@class='group_title']").text
+  end
+  # Get description of group
+  def group_description(group)
+    group.xpath(".//p[@class='group_description']").text
+  end
+  # Get start date for group
+  def group_start_date(group)
+    parse_dates(group.xpath(".//p[@class='group_date']").text)[0]
+  end
+  # Get group end date
+  def group_end_date(group)
+    parse_dates(group.xpath(".//p[@class='group_date']").text)[1]
+  end
+end

data/lib/indeed_parser.rb ADDED Viewed

@@ -0,0 +1,33 @@
+require 'requestmanager'
+require 'json'
+load 'personal_info.rb'
+load 'jobs.rb'
+class IndeedParser
+  def initialize(html, url, crawler_fields)
+    @html = html
+    @url = url
+    @crawler_fields = crawler_fields
+    parse
+  end
+  # Parse profile
+  def parse
+    p = PersonalInfo.new(@html, @url)
+    @personal_info = p.get_personal_info
+    j = Jobs.new(@html)
+    @job_info = j.get_jobs
+  end
+  # Get output
+  def get_results_by_job
+    output = Array.new
+    @job_info.each do |job|
+      output.push(job.merge!(@personal_info).merge!(@crawler_fields))
+    end
+    JSON.pretty_generate(output)
+  end
+end

data/lib/jobs.rb ADDED Viewed

@@ -0,0 +1,58 @@
+require 'nokogiri'
+load 'utilities.rb'
+class Jobs
+  include Utilities
+  def initialize(html)
+    @html = Nokogiri::HTML(html)
+    jobs = @html.xpath("//div[contains(concat(' ',normalize-space(@class),' '),' work-experience-section ')]")
+    @job_info = Array.new
+    jobs.each do |job|
+      @job_info.push({
+        job_title: job_title(job),
+        company: company(job),
+        company_location: company_location(job),
+        job_description: job_description(job),
+        start_date: start_date(job),
+        end_date: end_date(job)
+      })
+    end
+  end
+  # Return job info
+  def get_jobs
+    return @job_info
+  end
+  # Get job title
+  def job_title(job)
+    job.xpath(".//p[@class='work_title title']").text
+  end
+  # Get company
+  def company(job)
+    job.xpath(".//div[@class='work_company']//span").first.text
+  end
+  # Get work location
+  def company_location(job)
+    job.xpath(".//div[@class='work_company']//div[@class='inline-block']//span").text
+  end
+  # Get job description
+  def job_description(job)
+    job.xpath(".//p[@class='work_description']").text
+  end
+  # Get start date
+  def start_date(job)
+    parse_dates(job.xpath(".//p[@class='work_dates']").text)[0]
+  end
+  # Get end date
+  def end_date(job)
+    parse_dates(job.xpath(".//p[@class='work_dates']").text)[1]
+  end
+end

data/lib/links.rb ADDED Viewed

@@ -0,0 +1,32 @@
+require 'nokogiri'
+load 'utilities.rb'
+class Links
+  include Utilities
+  def initialize(html)
+    links = html.xpath("//div[contains(concat(' ',normalize-space(@class),' '),' link-section ')]")
+    @link_list = Array.new
+    links.each do |link|
+      @link_list.push({
+                        link_title: link_title(link),
+                        link_url: link_url(link)
+      })
+    end
+  end
+  # Return person info
+  def get_links
+    return @link_list
+  end
+  # Get title of link
+  def link_title(link)
+    link.xpath(".//a").text
+  end
+  # Get link url
+  def link_url(link)
+    link.xpath(".//a").first['href']
+  end
+end

data/lib/military_service.rb ADDED Viewed

@@ -0,0 +1,65 @@
+require 'nokogiri'
+load 'utilities.rb'
+class MilitaryService
+  include Utilities
+  def initialize(html)
+    military_items = html.xpath("//div[contains(concat(' ',normalize-space(@class),' '),' military-section ')]")
+    @military_service = Array.new
+    military_items.each do |mil_item|
+      @military_service.push({
+                               military_country: military_country(mil_item),
+                               military_branch: military_branch(mil_item),
+                               military_rank: military_rank(mil_item),
+                               military_description: military_description(mil_item),
+                               military_commendations: military_commendations(mil_item),
+                               military_start_date: military_start_date(mil_item),
+                               military_end_date: military_end_date(mil_item)
+      })
+    end
+  end
+  # Return military service info
+  def get_military_service
+    return @military_service
+  end
+  # Get country of military service
+  def military_country(mil_item)
+    remove = mil_item.xpath(".//p[@class='military_country']//span").text
+    mil_item.xpath(".//p[@class='military_country']").text.gsub(remove, "").strip.lstrip
+  end
+  # Get military branch
+  def military_branch(mil_item)
+    remove = mil_item.xpath(".//p[@class='military_branch']//span").text
+    mil_item.xpath(".//p[@class='military_branch']").text.gsub(remove, "").strip.lstrip
+  end
+  # Get military rank
+  def military_rank(mil_item)
+    remove = mil_item.xpath(".//p[@class='military_rank']//span").text
+    mil_item.xpath(".//p[@class='military_rank']").text.gsub(remove, "").strip.lstrip
+  end
+  # Get military description
+  def military_description(mil_item)
+    mil_item.xpath(".//p[@class='military_description']").text
+  end
+  # Get military commendations
+  def military_commendations(mil_item)
+    mil_item.xpath(".//p[@class='military_commendations']").text
+  end
+  # Get start date
+  def military_start_date(mil_item)
+    parse_dates(mil_item.xpath(".//p[@class='military_date']").text)[0]
+  end
+  # Get end date
+  def military_end_date(mil_item)
+    parse_dates(mil_item.xpath(".//p[@class='military_date']").text)[1]
+  end
+end

data/lib/personal_info.rb ADDED Viewed

@@ -0,0 +1,117 @@
+require 'nokogiri'
+load 'degrees.rb'
+load 'military_service.rb'
+load 'certifications.rb'
+load 'rec_people.rb'
+load 'links.rb'
+load 'awards.rb'
+load 'groups.rb'
+class PersonalInfo
+  def initialize(html, url)
+    @raw_html = html
+    @html = Nokogiri::HTML(html)
+    @url = url
+    @personal_info = {
+      name: name,
+      url: @url,
+      location: location,
+      current_title: current_title,
+      skills: skills,
+      summary: summary,
+      additional_info: additional_info,
+      last_updated: last_updated,
+      degrees: degrees,
+      military_service: military_service,
+      certifications: certifications,
+      rec_people: rec_people,
+      links: links,
+      awards: awards,
+      groups: groups,
+      fulltext: @raw_html
+    }
+  end
+  # Return personal info hash
+  def get_personal_info
+    return @personal_info
+  end
+  # Get certification data
+  def certifications
+    c = Certifications.new(@html)
+    c.get_certifications
+  end
+  # Get list of suggested resumes from side
+  def rec_people
+    r = RecPeople.new(@html)
+    r.get_rec_people
+  end
+  # Get any links they list
+  def links
+    l = Links.new(@html)
+    l.get_links
+  end
+  # Get list of awards
+  def awards
+    a = Awards.new(@html)
+    a.get_awards
+  end
+  # Get list of groups
+  def groups
+    g = Groups.new(@html)
+    g.get_groups
+  end
+  # Get list of degrees
+  def degrees
+    d = Degrees.new(@html)
+    d.get_degrees
+  end
+  # Get military service
+  def military_service
+    m = MilitaryService.new(@html)
+    m.get_military_service
+  end
+  # Get persons name
+  def name
+    @html.xpath("//h1[@itemprop='name']").text
+  end
+  # Get location
+  def location
+    @html.xpath("//p[@id='headline_location']").text
+  end
+  # Get overall job title
+  def current_title
+    @html.xpath("//h2[@id='headline']").text
+  end
+  # Get skills section
+  def skills
+    @html.xpath("//span[@class='skill-text']").text
+  end
+  # Get summary
+  def summary
+    @html.xpath("//p[@id='res_summary']").text
+  end
+  # Get additional info
+  def additional_info
+    @html.xpath("//div[@id='additionalinfo-section']//p").text
+  end
+  # Get last updated time
+  def last_updated
+    @html.xpath("//div[@id='resume_actions_contacted']").text.gsub("Updated: ", "")
+  end
+end

data/lib/rec_people.rb ADDED Viewed

@@ -0,0 +1,32 @@
+require 'nokogiri'
+load 'utilities.rb'
+class RecPeople
+  include Utilities
+  def initialize(html)
+    rec_people = html.css(".rec_resume")
+    @rec_people_list = Array.new
+    rec_people.each do |rec_person|
+      @rec_people_list.push({
+                              rec_person_name: rec_person_name(rec_person),
+                              rec_person_link: rec_person_link(rec_person)
+      })
+    end
+  end
+  # Return person info
+  def get_rec_people
+    return @rec_people_list
+  end
+  # Get name of suggested person
+  def rec_person_name(rec_person)
+    rec_person.css("a").text
+  end
+  # Get name of suggested link
+  def rec_person_link(rec_person)
+    rec_person.css("a").first['href']
+  end
+end

data/lib/utilities.rb ADDED Viewed

@@ -0,0 +1,23 @@
+require 'date'
+module Utilities
+  # Parse dates
+  def parse_dates(dates)
+    start_date, end_date = dates
+    if dates.include?(" to ")
+      start_date, end_date = dates.split(" to ")
+    end
+    return date_normalize(start_date), date_normalize(end_date)
+  end
+  def date_normalize(date)
+    begin
+      date = date+"-01-01" if date =~ /^(19|20)\d{2}$/
+      return Date.parse(date)
+    rescue
+      return date
+    end
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,55 @@
+--- !ruby/object:Gem::Specification
+name: indeedparser
+version: !ruby/object:Gem::Version
+  version: 0.0.1
+platform: ruby
+authors:
+- M. C. McGrath
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2015-12-23 00:00:00.000000000 Z
+dependencies: []
+description: Parses Indeed resumes
+email: shidash@transparencytoolkit.org
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/awards.rb
+- lib/certifications.rb
+- lib/degrees.rb
+- lib/groups.rb
+- lib/indeed_parser.rb
+- lib/jobs.rb
+- lib/links.rb
+- lib/military_service.rb
+- lib/personal_info.rb
+- lib/rec_people.rb
+- lib/utilities.rb
+homepage: https://github.com/TransparencyToolkit/indeedparser
+licenses:
+- GPL
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.4.8
+signing_key:
+specification_version: 4
+summary: Parses Indeed resumes
+test_files: []
+has_rdoc: