RubyGems - resume_exporter - Versions diffs - 0.0.1 - Mend

resume_exporter 0.0.1

Files changed (25) hide show

checksums.yaml +7 -0
data/bin/resume_exporter +47 -0
data/lib/exporters/json.rb +16 -0
data/lib/exporters/md.rb +11 -0
data/lib/exporters/txt.rb +11 -0
data/lib/exporters/xml.rb +12 -0
data/lib/exporters/yaml.rb +10 -0
data/lib/extractors/base.rb +32 -0
data/lib/extractors/factory.rb +21 -0
data/lib/extractors/html.rb +40 -0
data/lib/extractors/html/linkedin.rb +271 -0
data/lib/extractors/html/stackoverflow.rb +123 -0
data/lib/extractors/html/xing.rb +105 -0
data/lib/extractors/json.rb +38 -0
data/lib/extractors/json/fresh.rb +314 -0
data/lib/extractors/json/json_resume.rb +178 -0
data/lib/extractors/json/prtflio.rb +91 -0
data/lib/resume_exporter.rb +40 -0
data/lib/templates/default.json.jbuilder +289 -0
data/lib/templates/default.md.erb +407 -0
data/lib/templates/default.txt.erb +383 -0
data/lib/templates/default.xml.builder +287 -0
data/lib/templates/fresh.json.jbuilder +217 -0
data/lib/templates/json_resume.json.jbuilder +103 -0
metadata +208 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: e03168b0fb8cdb1b97b119e5cc6571d6013d5680
+  data.tar.gz: 360d4ae39463d516374a586c06b376d60d1fed9c
+SHA512:
+  metadata.gz: 871738aaa534f937a70ec9c46440615e1b67b56a02a948ced09d5232bf63e80e0da17336590fe11bfd1e7f583c02f794643b294caa65dd2d764cb6956e7097cb
+  data.tar.gz: c891419756f962ab6386f0a850a4ea895cb084f4b84f5fe7fe9ae65445a19f01746f1c9694a77fefac0fd00e41b4180840d9fc8209dd66bdf2e5c2f90a62f5ee

data/bin/resume_exporter ADDED Viewed

@@ -0,0 +1,47 @@
+require 'rubygems'
+require 'commander/import'
+require 'resume_exporter'
+program :version, '0.0.1'
+program :description, 'ResumeExporter is a tool to export data from public profile html files.
+Save your profile (e.g. from LinkedIn, Xing, or Stackoverflow) as html and export to json or xml with the help of ResumeExporter.
+Example 1: export resume data from .html file, and export as json:
+    resume_exporter /path/to/your/profile.html
+Example 2: export resume data from .html file, export as json and save to file:
+    resume_exporter /path/to/your/profile.html >> your_file.json
+Example 3: export resume data from .html file, and export as xml
+    resume_exporter /path/to/your/profile.html --format xml
+Example 4: export resume data from .html file, export as xml and save to file
+    resume_exporter /path/to/your/profile.html --format xml >> your_file.xml'
+command :export do |c|
+  c.syntax = 'resume_exporter export [options]'
+  c.description = 'export resume data from <file>, and export as json or xml
+          test.'
+  c.global_option '--format FORMAT', 'Specify the output format (default is json). You can choose json, xml, jsonresume, fresh, md, yaml'
+  c.example 'export resume data from .html file, and export as json', 'resume_exporter /path/to/your/profile.html'
+  c.example 'export resume data from .html file, and export as xml', 'resume_exporter /path/to/your/profile.html --format xml'
+  c.example 'export resume data from .html file, export as xml and save to file', 'resume_exporter /path/to/your/profile.html --format xml >> your_file.xml'
+  c.action do |args, options|
+    file = args.shift || abort('file argument required.')
+    options.default :format  => "json"
+    r = ResumeExporter.new(file)
+    say r.export(format: options.format)
+  end
+end
+default_command :export

data/lib/exporters/json.rb ADDED Viewed

@@ -0,0 +1,16 @@
+require 'tilt/jbuilder.rb'
+require 'multi_json'
+MultiJson.use :yajl
+MultiJson.dump_options = {:pretty=>true}
+Jbuilder.key_format camelize: :lower
+module Exporter
+  module Json
+    def self.export(options = {})
+      template = options[:template] || "default"
+      @data = options[:data]
+      Tilt::JbuilderTemplate.new(File.expand_path("../templates/#{template}.json.jbuilder", __dir__)).render(self)
+    end
+  end
+end

data/lib/exporters/md.rb ADDED Viewed

@@ -0,0 +1,11 @@
+require 'tilt'
+require 'erb'
+module Exporter
+  module Md
+    def self.export(options = {})
+      @data = options[:data]
+      Tilt.new(File.expand_path("../templates/default.md.erb", __dir__)).render(self)
+    end
+  end
+end

data/lib/exporters/txt.rb ADDED Viewed

@@ -0,0 +1,11 @@
+require 'tilt'
+require 'erb'
+module Exporter
+  module Txt
+    def self.export(options = {})
+      @data = options[:data]
+      Tilt.new(File.expand_path("../templates/default.txt.erb", __dir__)).render(self)
+    end
+  end
+end

data/lib/exporters/xml.rb ADDED Viewed

@@ -0,0 +1,12 @@
+require 'tilt'
+require 'builder'
+module Exporter
+  module Xml
+    def self.export(options = {})
+      template = options[:template] || "prtflio"
+      @data = options[:data]
+      Tilt.new(File.expand_path("../templates/#{template}.xml.builder", __dir__)).render(self)
+    end
+  end
+end

data/lib/exporters/yaml.rb ADDED Viewed

@@ -0,0 +1,10 @@
+require 'yaml'
+module Exporter
+  module Yaml
+    def self.export(options = {})
+      @data = options[:data]
+      @data.to_yaml
+    end
+  end
+end

data/lib/extractors/base.rb ADDED Viewed

@@ -0,0 +1,32 @@
+module Extractor
+  class Base
+    def attributes
+      %w(
+        meta
+        basics
+        employment
+        education
+        projects
+        openSource
+        skills
+        qualifications
+        recognition
+        writing
+        reading
+        speaking
+        patents
+        languages
+        interests
+        extracurriculars
+        affiliations
+        governance
+        service
+        references
+        disposition
+        location
+        samples
+        testimonials
+      )
+    end
+  end
+end

data/lib/extractors/factory.rb ADDED Viewed

@@ -0,0 +1,21 @@
+require "extractors/html"
+require "extractors/json"
+module Extractor
+  module Factory
+    def self.extractor_for(file_path)
+      if File.file?(file_path)
+        case File.extname(file_path)
+        when ".html", ".htm"
+          Extractor::Html.new(file_path)
+        when ".json"
+          Extractor::Json.new(file_path)
+        else
+          raise "File type not supported. Only .html or .json"
+        end
+      else
+        raise "File not found"
+      end
+    end
+  end
+end

data/lib/extractors/html.rb ADDED Viewed

@@ -0,0 +1,40 @@
+require "nokogiri"
+require "extractors/base"
+require "extractors/html/linkedin"
+require "extractors/html/stackoverflow"
+require "extractors/html/xing"
+module Extractor
+  class Html < Base
+    def initialize(file_path)
+      @doc = File.open(file_path) { |f| Nokogiri::HTML(f, nil, 'utf-8') }
+    end
+    def extract
+      if is_linkedin?
+        extractor = Extractor::Linkedin.new(@doc)
+      elsif is_xing?
+        extractor = Extractor::Xing.new(@doc)
+      elsif is_stackoverflow?
+        extractor = Extractor::Stackoverflow.new(@doc)
+      end
+      attributes.reduce({}) do |hash, attr|
+        hash[attr.to_sym] = extractor.send(attr.to_sym) if extractor.respond_to?(attr.to_sym)
+        hash
+      end
+    end
+    def is_linkedin?
+      @doc.css("link").any?{|s| s["href"].include?("licdn.com") }
+    end
+    def is_xing?
+      @doc.css("link").any?{|s| s["href"].include?("xing.com") }
+    end
+    def is_stackoverflow?
+      @doc.css("link").any?{|s| s["href"].include?("sstatic.net") }
+    end
+  end
+end

data/lib/extractors/html/linkedin.rb ADDED Viewed

@@ -0,0 +1,271 @@
+require "nokogiri"
+require "uri"
+module Extractor
+  class Linkedin
+    def initialize(doc)
+      @doc = doc
+    end
+    def basics
+      {
+        name: name,
+        label: label,
+        image: image,
+        summary: summary,
+        contact: {
+          website: website,
+          location: location
+        }
+      }
+    end
+    def name
+      "#{first_name} #{last_name}" if first_name || last_name
+    end
+    def first_name
+      @doc.at_css('#name').text.strip.split(' ', 2)[0].strip if @doc.at_css('#name')
+    end
+    def last_name
+      @doc.at_css('#name').text.strip.split(' ', 2)[1].strip if @doc.at_css('#name')
+    end
+    def label
+      @doc.at_css('.headline.title').text.gsub(/\s+/, ' ').strip if @doc.at_css('.headline.title')
+    end
+    def image
+      @doc.at_css('.profile-picture img')['src'] if @doc.at_css('.profile-picture img')
+    end
+    def location
+      @location ||= (@doc.at_css('.locality').text if @doc.at_css('.locality'))
+    end
+    def website
+      link = @doc.at_css('.extra-info .websites li a')['href'] if @doc.at_css('.extra-info .websites li a')
+      link = clean_up_linkedin_redirect_url(link) if link
+    end
+    def summary
+      @doc.at_css('#summary .description').text.gsub(/[[:space:]]/, ' ').strip if @doc.at_css('#summary .description')
+    end
+    def employment
+      {
+        history: @doc.css('#experience .position').map do |item|
+          experience = {}
+          experience[:position] = item.at_css(".item-title").text if item.at_css(".item-title")
+          experience[:employer] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
+          dates = item.css(".date-range time")
+          experience[:startDate] = dates[0].text if dates[0]
+          experience[:endDate] = dates[1].text if dates[1]
+          experience[:summary] = item.at_css(".description").text if item.at_css(".description")
+          experience
+        end
+      }
+    end
+    def education
+      {
+        history: @doc.css('#education .school').map do |item|
+          education = {}
+          education[:institution] = item.at_css(".item-title").text if item.at_css(".item-title")
+          subtitle = item.at_css(".item-subtitle span").text.split(", ") if item.at_css(".item-subtitle span")
+          if subtitle && subtitle.length == 3
+            education[:degree] = subtitle[0]
+            education[:fieldOfStudy] = subtitle[1]
+            education[:grade] = subtitle[2]
+          elsif subtitle
+            education[:fieldOfStudy] = subtitle
+          end
+          item.css(".description p").map do |d|
+            if !d.text.include?("Activities and Societies: ")
+              education[:summary] = d.text
+            end
+          end
+          dates = item.css(".date-range time")
+          education[:startDate] = dates[0].text if dates[0]
+          education[:endDate] = dates[1].text if dates[1]
+          education
+        end
+      }
+    end
+    def projects
+      {
+        history: @doc.css('#projects .project').map do |item|
+          project = {}
+          project[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
+          project[:url] = clean_up_linkedin_redirect_url(item.at_css(".item-title a")["href"]) if item.at_css(".item-title a")
+          dates = item.css(".date-range time")
+          project[:startDate] = dates[0].text if dates[0]
+          project[:endDate] = dates[1].text if dates[1]
+          project[:description] = item.at_css(".description").text if item.at_css(".description")
+          project
+        end
+      }
+    end
+    def skills
+      {
+        sets: @doc.css('#skills .skill a').map do |item|
+          { name: item.text }
+        end
+      }
+    end
+    def qualifications
+      {
+        history: [
+          @doc.css('#certifications .certification').map do |item|
+            certification = {}
+            certification[:category] = "Certification"
+            certification[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
+            certification[:from] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
+            certification[:url] = clean_up_linkedin_redirect_url(item.at_css(".item-title a")["href"]) if item.at_css(".item-title a")
+            dates = item.css(".date-range time")
+            certification[:startDate] = dates[0].text if dates[0]
+            certification[:endDate] = dates[1].text if dates[1]
+            certification
+          end,
+          @doc.css('#courses .course').map do |item|
+            course = {}
+            course[:category] = "Course"
+            course[:title] = item.at_css("span").text
+            course
+          end,
+          @doc.css('#scores .score').map do |item|
+            score = {}
+            score[:category] = "Test Score"
+            score[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
+            # score[:score] = item.at_css(".item-subtitle").text.gsub(/[^0-9]/, "") if item.at_css(".item-subtitle")
+            dates = item.css(".date-range time")
+            score[:startDate] = dates[0].text if dates[0]
+            score[:endDate] = dates[1].text if dates[1]
+            score[:summary] = item.at_css(".description").text if item.at_css(".description")
+            score
+          end
+        ].flatten
+      }
+    end
+    def recognition
+      {
+        history: @doc.css('#awards .award').map do |item|
+          award = {}
+          award[:category] = "Award"
+          award[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
+          award[:from] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
+          dates = item.css(".date-range time")
+          award[:startDate] = dates[0].text if dates[0]
+          award[:endDate] = dates[1].text if dates[1]
+          award[:summary] = item.at_css(".description").text if item.at_css(".description")
+          award
+        end
+      }
+    end
+    def writing
+      {
+        history: @doc.css('#publications .publication').map do |item|
+          publication = {}
+          publication[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
+          publication[:url] = clean_up_linkedin_redirect_url(item.at_css(".item-title a")["href"]) if item.at_css(".item-title a")
+          publication[:publisher] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
+          publication[:date] = item.css(".date-range time").text if item.css(".date-range time")
+          publication[:summary] = item.at_css(".description").text if item.at_css(".description")
+          # publication[:authors] = item.css(".contributors .contributor").map { |c| c.text }.join("") if item.at_css(".contributors .contributor")
+          publication
+        end
+      }
+    end
+    def patents
+      {
+        history: @doc.css('#patents .patent').map do |item|
+          patent = {}
+          patent[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
+          patent[:url] = clean_up_linkedin_redirect_url(item.at_css(".item-title a")["href"]) if item.at_css(".item-title a")
+          patent[:date] = item.at_css(".date-range time").text if item.at_css(".date-range time")
+          patent[:status] = item.at_css(".date-range").children.reject{|e| e.name == "time" }.map(&:text).join().strip if item.at_css(".date-range")
+          patent[:description] = item.at_css(".description").text if item.at_css(".description")
+          patent
+        end
+      }
+    end
+    def languages
+      {
+        list: @doc.css('#languages .language').map do |item|
+          language = {}
+          language[:language] = item.at_css(".name").text if item.at_css(".name")
+          language[:level] = item.at_css(".proficiency").text if item.at_css(".proficiency")
+          language
+        end
+      }
+    end
+    def interests
+      {
+        list: @doc.css('#interests .interest span').map do |item|
+          { name: item.text }
+        end
+      }
+    end
+    def affiliations
+      {
+        history: @doc.css('#organizations li').map do |item|
+          organization = {}
+          organization[:organization] = item.at_css(".item-title").text if item.at_css(".item-title")
+          organization[:roles] = [item.at_css(".item-subtitle").text] if item.at_css(".item-subtitle")
+          dates = item.css(".date-range time")
+          organization[:startDate] = dates[0].text if dates[0]
+          organization[:endDate] = dates[1].text if dates[1]
+          organization[:summary] = item.at_css(".description").text if item.at_css(".description")
+          organization
+        end
+      }
+    end
+    def service
+      {
+        history: @doc.css('#volunteering .position').map do |item|
+          volunteering = {}
+          volunteering[:category] = "Volunteer Work"
+          volunteering[:roles] = [item.at_css(".item-title").text] if item.at_css(".item-title")
+          volunteering[:organization] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
+          # volunteering[:cause] = item.at_css(".cause").text if item.at_css(".cause")
+          dates = item.css(".date-range time")
+          volunteering[:startDate] = dates[0].text if dates[0]
+          volunteering[:endDate] = dates[1].text if dates[1]
+          volunteering[:summary] = item.at_css(".description").text if item.at_css(".description")
+          volunteering
+        end
+      }
+    end
+    private
+      def clean_up_linkedin_redirect_url(link)
+        if link && link.include?("/redir/redirect")
+          uri = URI(link) if link
+          query = uri.query if uri
+          params = query.split("&") if query
+          url = params.select{|s| s.include?("url=")}.first if params
+          url = url.gsub("url=", "") if url
+          url = URI.unescape(url) if url
+          link = url if url
+        end
+        link
+      end
+  end
+end