resume_exporter 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/resume_exporter +47 -0
- data/lib/exporters/json.rb +16 -0
- data/lib/exporters/md.rb +11 -0
- data/lib/exporters/txt.rb +11 -0
- data/lib/exporters/xml.rb +12 -0
- data/lib/exporters/yaml.rb +10 -0
- data/lib/extractors/base.rb +32 -0
- data/lib/extractors/factory.rb +21 -0
- data/lib/extractors/html.rb +40 -0
- data/lib/extractors/html/linkedin.rb +271 -0
- data/lib/extractors/html/stackoverflow.rb +123 -0
- data/lib/extractors/html/xing.rb +105 -0
- data/lib/extractors/json.rb +38 -0
- data/lib/extractors/json/fresh.rb +314 -0
- data/lib/extractors/json/json_resume.rb +178 -0
- data/lib/extractors/json/prtflio.rb +91 -0
- data/lib/resume_exporter.rb +40 -0
- data/lib/templates/default.json.jbuilder +289 -0
- data/lib/templates/default.md.erb +407 -0
- data/lib/templates/default.txt.erb +383 -0
- data/lib/templates/default.xml.builder +287 -0
- data/lib/templates/fresh.json.jbuilder +217 -0
- data/lib/templates/json_resume.json.jbuilder +103 -0
- metadata +208 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: e03168b0fb8cdb1b97b119e5cc6571d6013d5680
|
4
|
+
data.tar.gz: 360d4ae39463d516374a586c06b376d60d1fed9c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 871738aaa534f937a70ec9c46440615e1b67b56a02a948ced09d5232bf63e80e0da17336590fe11bfd1e7f583c02f794643b294caa65dd2d764cb6956e7097cb
|
7
|
+
data.tar.gz: c891419756f962ab6386f0a850a4ea895cb084f4b84f5fe7fe9ae65445a19f01746f1c9694a77fefac0fd00e41b4180840d9fc8209dd66bdf2e5c2f90a62f5ee
|
data/bin/resume_exporter
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'commander/import'
|
3
|
+
require 'resume_exporter'
|
4
|
+
|
5
|
+
program :version, '0.0.1'
|
6
|
+
program :description, 'ResumeExporter is a tool to export data from public profile html files.
|
7
|
+
Save your profile (e.g. from LinkedIn, Xing, or Stackoverflow) as html and export to json or xml with the help of ResumeExporter.
|
8
|
+
|
9
|
+
Example 1: export resume data from .html file, and export as json:
|
10
|
+
|
11
|
+
resume_exporter /path/to/your/profile.html
|
12
|
+
|
13
|
+
|
14
|
+
Example 2: export resume data from .html file, export as json and save to file:
|
15
|
+
|
16
|
+
resume_exporter /path/to/your/profile.html >> your_file.json
|
17
|
+
|
18
|
+
|
19
|
+
Example 3: export resume data from .html file, and export as xml
|
20
|
+
|
21
|
+
resume_exporter /path/to/your/profile.html --format xml
|
22
|
+
|
23
|
+
|
24
|
+
Example 4: export resume data from .html file, export as xml and save to file
|
25
|
+
|
26
|
+
resume_exporter /path/to/your/profile.html --format xml >> your_file.xml'
|
27
|
+
|
28
|
+
command :export do |c|
|
29
|
+
c.syntax = 'resume_exporter export [options]'
|
30
|
+
c.description = 'export resume data from <file>, and export as json or xml
|
31
|
+
test.'
|
32
|
+
c.global_option '--format FORMAT', 'Specify the output format (default is json). You can choose json, xml, jsonresume, fresh, md, yaml'
|
33
|
+
c.example 'export resume data from .html file, and export as json', 'resume_exporter /path/to/your/profile.html'
|
34
|
+
c.example 'export resume data from .html file, and export as xml', 'resume_exporter /path/to/your/profile.html --format xml'
|
35
|
+
c.example 'export resume data from .html file, export as xml and save to file', 'resume_exporter /path/to/your/profile.html --format xml >> your_file.xml'
|
36
|
+
c.action do |args, options|
|
37
|
+
file = args.shift || abort('file argument required.')
|
38
|
+
|
39
|
+
options.default :format => "json"
|
40
|
+
|
41
|
+
r = ResumeExporter.new(file)
|
42
|
+
|
43
|
+
say r.export(format: options.format)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
default_command :export
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'tilt/jbuilder.rb'
|
2
|
+
require 'multi_json'
|
3
|
+
MultiJson.use :yajl
|
4
|
+
MultiJson.dump_options = {:pretty=>true}
|
5
|
+
Jbuilder.key_format camelize: :lower
|
6
|
+
|
7
|
+
|
8
|
+
module Exporter
|
9
|
+
module Json
|
10
|
+
def self.export(options = {})
|
11
|
+
template = options[:template] || "default"
|
12
|
+
@data = options[:data]
|
13
|
+
Tilt::JbuilderTemplate.new(File.expand_path("../templates/#{template}.json.jbuilder", __dir__)).render(self)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
data/lib/exporters/md.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'tilt'
|
2
|
+
require 'builder'
|
3
|
+
|
4
|
+
module Exporter
|
5
|
+
module Xml
|
6
|
+
def self.export(options = {})
|
7
|
+
template = options[:template] || "prtflio"
|
8
|
+
@data = options[:data]
|
9
|
+
Tilt.new(File.expand_path("../templates/#{template}.xml.builder", __dir__)).render(self)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Extractor
|
2
|
+
class Base
|
3
|
+
def attributes
|
4
|
+
%w(
|
5
|
+
meta
|
6
|
+
basics
|
7
|
+
employment
|
8
|
+
education
|
9
|
+
projects
|
10
|
+
openSource
|
11
|
+
skills
|
12
|
+
qualifications
|
13
|
+
recognition
|
14
|
+
writing
|
15
|
+
reading
|
16
|
+
speaking
|
17
|
+
patents
|
18
|
+
languages
|
19
|
+
interests
|
20
|
+
extracurriculars
|
21
|
+
affiliations
|
22
|
+
governance
|
23
|
+
service
|
24
|
+
references
|
25
|
+
disposition
|
26
|
+
location
|
27
|
+
samples
|
28
|
+
testimonials
|
29
|
+
)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require "extractors/html"
|
2
|
+
require "extractors/json"
|
3
|
+
|
4
|
+
module Extractor
|
5
|
+
module Factory
|
6
|
+
def self.extractor_for(file_path)
|
7
|
+
if File.file?(file_path)
|
8
|
+
case File.extname(file_path)
|
9
|
+
when ".html", ".htm"
|
10
|
+
Extractor::Html.new(file_path)
|
11
|
+
when ".json"
|
12
|
+
Extractor::Json.new(file_path)
|
13
|
+
else
|
14
|
+
raise "File type not supported. Only .html or .json"
|
15
|
+
end
|
16
|
+
else
|
17
|
+
raise "File not found"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
require "extractors/base"
|
3
|
+
require "extractors/html/linkedin"
|
4
|
+
require "extractors/html/stackoverflow"
|
5
|
+
require "extractors/html/xing"
|
6
|
+
|
7
|
+
module Extractor
|
8
|
+
class Html < Base
|
9
|
+
def initialize(file_path)
|
10
|
+
@doc = File.open(file_path) { |f| Nokogiri::HTML(f, nil, 'utf-8') }
|
11
|
+
end
|
12
|
+
|
13
|
+
def extract
|
14
|
+
if is_linkedin?
|
15
|
+
extractor = Extractor::Linkedin.new(@doc)
|
16
|
+
elsif is_xing?
|
17
|
+
extractor = Extractor::Xing.new(@doc)
|
18
|
+
elsif is_stackoverflow?
|
19
|
+
extractor = Extractor::Stackoverflow.new(@doc)
|
20
|
+
end
|
21
|
+
|
22
|
+
attributes.reduce({}) do |hash, attr|
|
23
|
+
hash[attr.to_sym] = extractor.send(attr.to_sym) if extractor.respond_to?(attr.to_sym)
|
24
|
+
hash
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def is_linkedin?
|
29
|
+
@doc.css("link").any?{|s| s["href"].include?("licdn.com") }
|
30
|
+
end
|
31
|
+
|
32
|
+
def is_xing?
|
33
|
+
@doc.css("link").any?{|s| s["href"].include?("xing.com") }
|
34
|
+
end
|
35
|
+
|
36
|
+
def is_stackoverflow?
|
37
|
+
@doc.css("link").any?{|s| s["href"].include?("sstatic.net") }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,271 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
require "uri"
|
3
|
+
|
4
|
+
module Extractor
|
5
|
+
class Linkedin
|
6
|
+
def initialize(doc)
|
7
|
+
@doc = doc
|
8
|
+
end
|
9
|
+
|
10
|
+
def basics
|
11
|
+
{
|
12
|
+
name: name,
|
13
|
+
label: label,
|
14
|
+
image: image,
|
15
|
+
summary: summary,
|
16
|
+
contact: {
|
17
|
+
website: website,
|
18
|
+
location: location
|
19
|
+
}
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
def name
|
24
|
+
"#{first_name} #{last_name}" if first_name || last_name
|
25
|
+
end
|
26
|
+
|
27
|
+
def first_name
|
28
|
+
@doc.at_css('#name').text.strip.split(' ', 2)[0].strip if @doc.at_css('#name')
|
29
|
+
end
|
30
|
+
|
31
|
+
def last_name
|
32
|
+
@doc.at_css('#name').text.strip.split(' ', 2)[1].strip if @doc.at_css('#name')
|
33
|
+
end
|
34
|
+
|
35
|
+
def label
|
36
|
+
@doc.at_css('.headline.title').text.gsub(/\s+/, ' ').strip if @doc.at_css('.headline.title')
|
37
|
+
end
|
38
|
+
|
39
|
+
def image
|
40
|
+
@doc.at_css('.profile-picture img')['src'] if @doc.at_css('.profile-picture img')
|
41
|
+
end
|
42
|
+
|
43
|
+
def location
|
44
|
+
@location ||= (@doc.at_css('.locality').text if @doc.at_css('.locality'))
|
45
|
+
end
|
46
|
+
|
47
|
+
def website
|
48
|
+
link = @doc.at_css('.extra-info .websites li a')['href'] if @doc.at_css('.extra-info .websites li a')
|
49
|
+
link = clean_up_linkedin_redirect_url(link) if link
|
50
|
+
end
|
51
|
+
|
52
|
+
def summary
|
53
|
+
@doc.at_css('#summary .description').text.gsub(/[[:space:]]/, ' ').strip if @doc.at_css('#summary .description')
|
54
|
+
end
|
55
|
+
|
56
|
+
def employment
|
57
|
+
{
|
58
|
+
history: @doc.css('#experience .position').map do |item|
|
59
|
+
experience = {}
|
60
|
+
experience[:position] = item.at_css(".item-title").text if item.at_css(".item-title")
|
61
|
+
experience[:employer] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
|
62
|
+
dates = item.css(".date-range time")
|
63
|
+
experience[:startDate] = dates[0].text if dates[0]
|
64
|
+
experience[:endDate] = dates[1].text if dates[1]
|
65
|
+
experience[:summary] = item.at_css(".description").text if item.at_css(".description")
|
66
|
+
experience
|
67
|
+
end
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
def education
|
72
|
+
{
|
73
|
+
history: @doc.css('#education .school').map do |item|
|
74
|
+
education = {}
|
75
|
+
education[:institution] = item.at_css(".item-title").text if item.at_css(".item-title")
|
76
|
+
|
77
|
+
subtitle = item.at_css(".item-subtitle span").text.split(", ") if item.at_css(".item-subtitle span")
|
78
|
+
if subtitle && subtitle.length == 3
|
79
|
+
education[:degree] = subtitle[0]
|
80
|
+
education[:fieldOfStudy] = subtitle[1]
|
81
|
+
education[:grade] = subtitle[2]
|
82
|
+
elsif subtitle
|
83
|
+
education[:fieldOfStudy] = subtitle
|
84
|
+
end
|
85
|
+
|
86
|
+
item.css(".description p").map do |d|
|
87
|
+
if !d.text.include?("Activities and Societies: ")
|
88
|
+
education[:summary] = d.text
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
dates = item.css(".date-range time")
|
93
|
+
education[:startDate] = dates[0].text if dates[0]
|
94
|
+
education[:endDate] = dates[1].text if dates[1]
|
95
|
+
education
|
96
|
+
end
|
97
|
+
}
|
98
|
+
end
|
99
|
+
|
100
|
+
def projects
|
101
|
+
{
|
102
|
+
history: @doc.css('#projects .project').map do |item|
|
103
|
+
project = {}
|
104
|
+
project[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
|
105
|
+
project[:url] = clean_up_linkedin_redirect_url(item.at_css(".item-title a")["href"]) if item.at_css(".item-title a")
|
106
|
+
dates = item.css(".date-range time")
|
107
|
+
project[:startDate] = dates[0].text if dates[0]
|
108
|
+
project[:endDate] = dates[1].text if dates[1]
|
109
|
+
project[:description] = item.at_css(".description").text if item.at_css(".description")
|
110
|
+
project
|
111
|
+
end
|
112
|
+
}
|
113
|
+
end
|
114
|
+
|
115
|
+
def skills
|
116
|
+
{
|
117
|
+
sets: @doc.css('#skills .skill a').map do |item|
|
118
|
+
{ name: item.text }
|
119
|
+
end
|
120
|
+
}
|
121
|
+
end
|
122
|
+
|
123
|
+
def qualifications
|
124
|
+
{
|
125
|
+
history: [
|
126
|
+
@doc.css('#certifications .certification').map do |item|
|
127
|
+
certification = {}
|
128
|
+
certification[:category] = "Certification"
|
129
|
+
certification[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
|
130
|
+
certification[:from] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
|
131
|
+
certification[:url] = clean_up_linkedin_redirect_url(item.at_css(".item-title a")["href"]) if item.at_css(".item-title a")
|
132
|
+
dates = item.css(".date-range time")
|
133
|
+
certification[:startDate] = dates[0].text if dates[0]
|
134
|
+
certification[:endDate] = dates[1].text if dates[1]
|
135
|
+
certification
|
136
|
+
end,
|
137
|
+
|
138
|
+
@doc.css('#courses .course').map do |item|
|
139
|
+
course = {}
|
140
|
+
course[:category] = "Course"
|
141
|
+
course[:title] = item.at_css("span").text
|
142
|
+
course
|
143
|
+
end,
|
144
|
+
|
145
|
+
@doc.css('#scores .score').map do |item|
|
146
|
+
score = {}
|
147
|
+
score[:category] = "Test Score"
|
148
|
+
score[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
|
149
|
+
# score[:score] = item.at_css(".item-subtitle").text.gsub(/[^0-9]/, "") if item.at_css(".item-subtitle")
|
150
|
+
dates = item.css(".date-range time")
|
151
|
+
score[:startDate] = dates[0].text if dates[0]
|
152
|
+
score[:endDate] = dates[1].text if dates[1]
|
153
|
+
score[:summary] = item.at_css(".description").text if item.at_css(".description")
|
154
|
+
score
|
155
|
+
end
|
156
|
+
].flatten
|
157
|
+
}
|
158
|
+
end
|
159
|
+
|
160
|
+
def recognition
|
161
|
+
{
|
162
|
+
history: @doc.css('#awards .award').map do |item|
|
163
|
+
award = {}
|
164
|
+
award[:category] = "Award"
|
165
|
+
award[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
|
166
|
+
award[:from] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
|
167
|
+
dates = item.css(".date-range time")
|
168
|
+
award[:startDate] = dates[0].text if dates[0]
|
169
|
+
award[:endDate] = dates[1].text if dates[1]
|
170
|
+
award[:summary] = item.at_css(".description").text if item.at_css(".description")
|
171
|
+
award
|
172
|
+
end
|
173
|
+
}
|
174
|
+
end
|
175
|
+
|
176
|
+
def writing
|
177
|
+
{
|
178
|
+
history: @doc.css('#publications .publication').map do |item|
|
179
|
+
publication = {}
|
180
|
+
publication[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
|
181
|
+
publication[:url] = clean_up_linkedin_redirect_url(item.at_css(".item-title a")["href"]) if item.at_css(".item-title a")
|
182
|
+
publication[:publisher] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
|
183
|
+
publication[:date] = item.css(".date-range time").text if item.css(".date-range time")
|
184
|
+
publication[:summary] = item.at_css(".description").text if item.at_css(".description")
|
185
|
+
# publication[:authors] = item.css(".contributors .contributor").map { |c| c.text }.join("") if item.at_css(".contributors .contributor")
|
186
|
+
publication
|
187
|
+
end
|
188
|
+
}
|
189
|
+
end
|
190
|
+
|
191
|
+
def patents
|
192
|
+
{
|
193
|
+
history: @doc.css('#patents .patent').map do |item|
|
194
|
+
patent = {}
|
195
|
+
patent[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
|
196
|
+
patent[:url] = clean_up_linkedin_redirect_url(item.at_css(".item-title a")["href"]) if item.at_css(".item-title a")
|
197
|
+
patent[:date] = item.at_css(".date-range time").text if item.at_css(".date-range time")
|
198
|
+
patent[:status] = item.at_css(".date-range").children.reject{|e| e.name == "time" }.map(&:text).join().strip if item.at_css(".date-range")
|
199
|
+
patent[:description] = item.at_css(".description").text if item.at_css(".description")
|
200
|
+
patent
|
201
|
+
end
|
202
|
+
}
|
203
|
+
end
|
204
|
+
|
205
|
+
def languages
|
206
|
+
{
|
207
|
+
list: @doc.css('#languages .language').map do |item|
|
208
|
+
language = {}
|
209
|
+
language[:language] = item.at_css(".name").text if item.at_css(".name")
|
210
|
+
language[:level] = item.at_css(".proficiency").text if item.at_css(".proficiency")
|
211
|
+
language
|
212
|
+
end
|
213
|
+
}
|
214
|
+
end
|
215
|
+
|
216
|
+
def interests
|
217
|
+
{
|
218
|
+
list: @doc.css('#interests .interest span').map do |item|
|
219
|
+
{ name: item.text }
|
220
|
+
end
|
221
|
+
}
|
222
|
+
end
|
223
|
+
|
224
|
+
def affiliations
|
225
|
+
{
|
226
|
+
history: @doc.css('#organizations li').map do |item|
|
227
|
+
organization = {}
|
228
|
+
organization[:organization] = item.at_css(".item-title").text if item.at_css(".item-title")
|
229
|
+
organization[:roles] = [item.at_css(".item-subtitle").text] if item.at_css(".item-subtitle")
|
230
|
+
dates = item.css(".date-range time")
|
231
|
+
organization[:startDate] = dates[0].text if dates[0]
|
232
|
+
organization[:endDate] = dates[1].text if dates[1]
|
233
|
+
organization[:summary] = item.at_css(".description").text if item.at_css(".description")
|
234
|
+
organization
|
235
|
+
end
|
236
|
+
}
|
237
|
+
end
|
238
|
+
|
239
|
+
def service
|
240
|
+
{
|
241
|
+
history: @doc.css('#volunteering .position').map do |item|
|
242
|
+
volunteering = {}
|
243
|
+
volunteering[:category] = "Volunteer Work"
|
244
|
+
volunteering[:roles] = [item.at_css(".item-title").text] if item.at_css(".item-title")
|
245
|
+
volunteering[:organization] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
|
246
|
+
# volunteering[:cause] = item.at_css(".cause").text if item.at_css(".cause")
|
247
|
+
dates = item.css(".date-range time")
|
248
|
+
volunteering[:startDate] = dates[0].text if dates[0]
|
249
|
+
volunteering[:endDate] = dates[1].text if dates[1]
|
250
|
+
volunteering[:summary] = item.at_css(".description").text if item.at_css(".description")
|
251
|
+
volunteering
|
252
|
+
end
|
253
|
+
}
|
254
|
+
end
|
255
|
+
|
256
|
+
private
|
257
|
+
def clean_up_linkedin_redirect_url(link)
|
258
|
+
if link && link.include?("/redir/redirect")
|
259
|
+
uri = URI(link) if link
|
260
|
+
query = uri.query if uri
|
261
|
+
params = query.split("&") if query
|
262
|
+
url = params.select{|s| s.include?("url=")}.first if params
|
263
|
+
url = url.gsub("url=", "") if url
|
264
|
+
url = URI.unescape(url) if url
|
265
|
+
link = url if url
|
266
|
+
end
|
267
|
+
|
268
|
+
link
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|