resume_exporter 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/resume_exporter +47 -0
- data/lib/exporters/json.rb +16 -0
- data/lib/exporters/md.rb +11 -0
- data/lib/exporters/txt.rb +11 -0
- data/lib/exporters/xml.rb +12 -0
- data/lib/exporters/yaml.rb +10 -0
- data/lib/extractors/base.rb +32 -0
- data/lib/extractors/factory.rb +21 -0
- data/lib/extractors/html.rb +40 -0
- data/lib/extractors/html/linkedin.rb +271 -0
- data/lib/extractors/html/stackoverflow.rb +123 -0
- data/lib/extractors/html/xing.rb +105 -0
- data/lib/extractors/json.rb +38 -0
- data/lib/extractors/json/fresh.rb +314 -0
- data/lib/extractors/json/json_resume.rb +178 -0
- data/lib/extractors/json/prtflio.rb +91 -0
- data/lib/resume_exporter.rb +40 -0
- data/lib/templates/default.json.jbuilder +289 -0
- data/lib/templates/default.md.erb +407 -0
- data/lib/templates/default.txt.erb +383 -0
- data/lib/templates/default.xml.builder +287 -0
- data/lib/templates/fresh.json.jbuilder +217 -0
- data/lib/templates/json_resume.json.jbuilder +103 -0
- metadata +208 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: e03168b0fb8cdb1b97b119e5cc6571d6013d5680
|
4
|
+
data.tar.gz: 360d4ae39463d516374a586c06b376d60d1fed9c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 871738aaa534f937a70ec9c46440615e1b67b56a02a948ced09d5232bf63e80e0da17336590fe11bfd1e7f583c02f794643b294caa65dd2d764cb6956e7097cb
|
7
|
+
data.tar.gz: c891419756f962ab6386f0a850a4ea895cb084f4b84f5fe7fe9ae65445a19f01746f1c9694a77fefac0fd00e41b4180840d9fc8209dd66bdf2e5c2f90a62f5ee
|
data/bin/resume_exporter
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'commander/import'
|
3
|
+
require 'resume_exporter'
|
4
|
+
|
5
|
+
program :version, '0.0.1'
|
6
|
+
program :description, 'ResumeExporter is a tool to export data from public profile html files.
|
7
|
+
Save your profile (e.g. from LinkedIn, Xing, or Stackoverflow) as html and export to json or xml with the help of ResumeExporter.
|
8
|
+
|
9
|
+
Example 1: export resume data from .html file, and export as json:
|
10
|
+
|
11
|
+
resume_exporter /path/to/your/profile.html
|
12
|
+
|
13
|
+
|
14
|
+
Example 2: export resume data from .html file, export as json and save to file:
|
15
|
+
|
16
|
+
resume_exporter /path/to/your/profile.html >> your_file.json
|
17
|
+
|
18
|
+
|
19
|
+
Example 3: export resume data from .html file, and export as xml
|
20
|
+
|
21
|
+
resume_exporter /path/to/your/profile.html --format xml
|
22
|
+
|
23
|
+
|
24
|
+
Example 4: export resume data from .html file, export as xml and save to file
|
25
|
+
|
26
|
+
resume_exporter /path/to/your/profile.html --format xml >> your_file.xml'
|
27
|
+
|
28
|
+
command :export do |c|
|
29
|
+
c.syntax = 'resume_exporter export [options]'
|
30
|
+
c.description = 'export resume data from <file>, and export as json or xml
|
31
|
+
test.'
|
32
|
+
c.global_option '--format FORMAT', 'Specify the output format (default is json). You can choose json, xml, jsonresume, fresh, md, yaml'
|
33
|
+
c.example 'export resume data from .html file, and export as json', 'resume_exporter /path/to/your/profile.html'
|
34
|
+
c.example 'export resume data from .html file, and export as xml', 'resume_exporter /path/to/your/profile.html --format xml'
|
35
|
+
c.example 'export resume data from .html file, export as xml and save to file', 'resume_exporter /path/to/your/profile.html --format xml >> your_file.xml'
|
36
|
+
c.action do |args, options|
|
37
|
+
file = args.shift || abort('file argument required.')
|
38
|
+
|
39
|
+
options.default :format => "json"
|
40
|
+
|
41
|
+
r = ResumeExporter.new(file)
|
42
|
+
|
43
|
+
say r.export(format: options.format)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
default_command :export
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'tilt/jbuilder.rb'
|
2
|
+
require 'multi_json'
|
3
|
+
MultiJson.use :yajl
|
4
|
+
MultiJson.dump_options = {:pretty=>true}
|
5
|
+
Jbuilder.key_format camelize: :lower
|
6
|
+
|
7
|
+
|
8
|
+
module Exporter
|
9
|
+
module Json
|
10
|
+
def self.export(options = {})
|
11
|
+
template = options[:template] || "default"
|
12
|
+
@data = options[:data]
|
13
|
+
Tilt::JbuilderTemplate.new(File.expand_path("../templates/#{template}.json.jbuilder", __dir__)).render(self)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
data/lib/exporters/md.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'tilt'
|
2
|
+
require 'builder'
|
3
|
+
|
4
|
+
module Exporter
|
5
|
+
module Xml
|
6
|
+
def self.export(options = {})
|
7
|
+
template = options[:template] || "prtflio"
|
8
|
+
@data = options[:data]
|
9
|
+
Tilt.new(File.expand_path("../templates/#{template}.xml.builder", __dir__)).render(self)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Extractor
|
2
|
+
class Base
|
3
|
+
def attributes
|
4
|
+
%w(
|
5
|
+
meta
|
6
|
+
basics
|
7
|
+
employment
|
8
|
+
education
|
9
|
+
projects
|
10
|
+
openSource
|
11
|
+
skills
|
12
|
+
qualifications
|
13
|
+
recognition
|
14
|
+
writing
|
15
|
+
reading
|
16
|
+
speaking
|
17
|
+
patents
|
18
|
+
languages
|
19
|
+
interests
|
20
|
+
extracurriculars
|
21
|
+
affiliations
|
22
|
+
governance
|
23
|
+
service
|
24
|
+
references
|
25
|
+
disposition
|
26
|
+
location
|
27
|
+
samples
|
28
|
+
testimonials
|
29
|
+
)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require "extractors/html"
|
2
|
+
require "extractors/json"
|
3
|
+
|
4
|
+
module Extractor
|
5
|
+
module Factory
|
6
|
+
def self.extractor_for(file_path)
|
7
|
+
if File.file?(file_path)
|
8
|
+
case File.extname(file_path)
|
9
|
+
when ".html", ".htm"
|
10
|
+
Extractor::Html.new(file_path)
|
11
|
+
when ".json"
|
12
|
+
Extractor::Json.new(file_path)
|
13
|
+
else
|
14
|
+
raise "File type not supported. Only .html or .json"
|
15
|
+
end
|
16
|
+
else
|
17
|
+
raise "File not found"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
require "extractors/base"
|
3
|
+
require "extractors/html/linkedin"
|
4
|
+
require "extractors/html/stackoverflow"
|
5
|
+
require "extractors/html/xing"
|
6
|
+
|
7
|
+
module Extractor
|
8
|
+
class Html < Base
|
9
|
+
def initialize(file_path)
|
10
|
+
@doc = File.open(file_path) { |f| Nokogiri::HTML(f, nil, 'utf-8') }
|
11
|
+
end
|
12
|
+
|
13
|
+
def extract
|
14
|
+
if is_linkedin?
|
15
|
+
extractor = Extractor::Linkedin.new(@doc)
|
16
|
+
elsif is_xing?
|
17
|
+
extractor = Extractor::Xing.new(@doc)
|
18
|
+
elsif is_stackoverflow?
|
19
|
+
extractor = Extractor::Stackoverflow.new(@doc)
|
20
|
+
end
|
21
|
+
|
22
|
+
attributes.reduce({}) do |hash, attr|
|
23
|
+
hash[attr.to_sym] = extractor.send(attr.to_sym) if extractor.respond_to?(attr.to_sym)
|
24
|
+
hash
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def is_linkedin?
|
29
|
+
@doc.css("link").any?{|s| s["href"].include?("licdn.com") }
|
30
|
+
end
|
31
|
+
|
32
|
+
def is_xing?
|
33
|
+
@doc.css("link").any?{|s| s["href"].include?("xing.com") }
|
34
|
+
end
|
35
|
+
|
36
|
+
def is_stackoverflow?
|
37
|
+
@doc.css("link").any?{|s| s["href"].include?("sstatic.net") }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,271 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
require "uri"
|
3
|
+
|
4
|
+
module Extractor
|
5
|
+
class Linkedin
|
6
|
+
def initialize(doc)
|
7
|
+
@doc = doc
|
8
|
+
end
|
9
|
+
|
10
|
+
def basics
|
11
|
+
{
|
12
|
+
name: name,
|
13
|
+
label: label,
|
14
|
+
image: image,
|
15
|
+
summary: summary,
|
16
|
+
contact: {
|
17
|
+
website: website,
|
18
|
+
location: location
|
19
|
+
}
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
def name
|
24
|
+
"#{first_name} #{last_name}" if first_name || last_name
|
25
|
+
end
|
26
|
+
|
27
|
+
def first_name
|
28
|
+
@doc.at_css('#name').text.strip.split(' ', 2)[0].strip if @doc.at_css('#name')
|
29
|
+
end
|
30
|
+
|
31
|
+
def last_name
|
32
|
+
@doc.at_css('#name').text.strip.split(' ', 2)[1].strip if @doc.at_css('#name')
|
33
|
+
end
|
34
|
+
|
35
|
+
def label
|
36
|
+
@doc.at_css('.headline.title').text.gsub(/\s+/, ' ').strip if @doc.at_css('.headline.title')
|
37
|
+
end
|
38
|
+
|
39
|
+
def image
|
40
|
+
@doc.at_css('.profile-picture img')['src'] if @doc.at_css('.profile-picture img')
|
41
|
+
end
|
42
|
+
|
43
|
+
def location
|
44
|
+
@location ||= (@doc.at_css('.locality').text if @doc.at_css('.locality'))
|
45
|
+
end
|
46
|
+
|
47
|
+
def website
|
48
|
+
link = @doc.at_css('.extra-info .websites li a')['href'] if @doc.at_css('.extra-info .websites li a')
|
49
|
+
link = clean_up_linkedin_redirect_url(link) if link
|
50
|
+
end
|
51
|
+
|
52
|
+
def summary
|
53
|
+
@doc.at_css('#summary .description').text.gsub(/[[:space:]]/, ' ').strip if @doc.at_css('#summary .description')
|
54
|
+
end
|
55
|
+
|
56
|
+
def employment
|
57
|
+
{
|
58
|
+
history: @doc.css('#experience .position').map do |item|
|
59
|
+
experience = {}
|
60
|
+
experience[:position] = item.at_css(".item-title").text if item.at_css(".item-title")
|
61
|
+
experience[:employer] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
|
62
|
+
dates = item.css(".date-range time")
|
63
|
+
experience[:startDate] = dates[0].text if dates[0]
|
64
|
+
experience[:endDate] = dates[1].text if dates[1]
|
65
|
+
experience[:summary] = item.at_css(".description").text if item.at_css(".description")
|
66
|
+
experience
|
67
|
+
end
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
def education
|
72
|
+
{
|
73
|
+
history: @doc.css('#education .school').map do |item|
|
74
|
+
education = {}
|
75
|
+
education[:institution] = item.at_css(".item-title").text if item.at_css(".item-title")
|
76
|
+
|
77
|
+
subtitle = item.at_css(".item-subtitle span").text.split(", ") if item.at_css(".item-subtitle span")
|
78
|
+
if subtitle && subtitle.length == 3
|
79
|
+
education[:degree] = subtitle[0]
|
80
|
+
education[:fieldOfStudy] = subtitle[1]
|
81
|
+
education[:grade] = subtitle[2]
|
82
|
+
elsif subtitle
|
83
|
+
education[:fieldOfStudy] = subtitle
|
84
|
+
end
|
85
|
+
|
86
|
+
item.css(".description p").map do |d|
|
87
|
+
if !d.text.include?("Activities and Societies: ")
|
88
|
+
education[:summary] = d.text
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
dates = item.css(".date-range time")
|
93
|
+
education[:startDate] = dates[0].text if dates[0]
|
94
|
+
education[:endDate] = dates[1].text if dates[1]
|
95
|
+
education
|
96
|
+
end
|
97
|
+
}
|
98
|
+
end
|
99
|
+
|
100
|
+
def projects
|
101
|
+
{
|
102
|
+
history: @doc.css('#projects .project').map do |item|
|
103
|
+
project = {}
|
104
|
+
project[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
|
105
|
+
project[:url] = clean_up_linkedin_redirect_url(item.at_css(".item-title a")["href"]) if item.at_css(".item-title a")
|
106
|
+
dates = item.css(".date-range time")
|
107
|
+
project[:startDate] = dates[0].text if dates[0]
|
108
|
+
project[:endDate] = dates[1].text if dates[1]
|
109
|
+
project[:description] = item.at_css(".description").text if item.at_css(".description")
|
110
|
+
project
|
111
|
+
end
|
112
|
+
}
|
113
|
+
end
|
114
|
+
|
115
|
+
def skills
|
116
|
+
{
|
117
|
+
sets: @doc.css('#skills .skill a').map do |item|
|
118
|
+
{ name: item.text }
|
119
|
+
end
|
120
|
+
}
|
121
|
+
end
|
122
|
+
|
123
|
+
def qualifications
|
124
|
+
{
|
125
|
+
history: [
|
126
|
+
@doc.css('#certifications .certification').map do |item|
|
127
|
+
certification = {}
|
128
|
+
certification[:category] = "Certification"
|
129
|
+
certification[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
|
130
|
+
certification[:from] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
|
131
|
+
certification[:url] = clean_up_linkedin_redirect_url(item.at_css(".item-title a")["href"]) if item.at_css(".item-title a")
|
132
|
+
dates = item.css(".date-range time")
|
133
|
+
certification[:startDate] = dates[0].text if dates[0]
|
134
|
+
certification[:endDate] = dates[1].text if dates[1]
|
135
|
+
certification
|
136
|
+
end,
|
137
|
+
|
138
|
+
@doc.css('#courses .course').map do |item|
|
139
|
+
course = {}
|
140
|
+
course[:category] = "Course"
|
141
|
+
course[:title] = item.at_css("span").text
|
142
|
+
course
|
143
|
+
end,
|
144
|
+
|
145
|
+
@doc.css('#scores .score').map do |item|
|
146
|
+
score = {}
|
147
|
+
score[:category] = "Test Score"
|
148
|
+
score[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
|
149
|
+
# score[:score] = item.at_css(".item-subtitle").text.gsub(/[^0-9]/, "") if item.at_css(".item-subtitle")
|
150
|
+
dates = item.css(".date-range time")
|
151
|
+
score[:startDate] = dates[0].text if dates[0]
|
152
|
+
score[:endDate] = dates[1].text if dates[1]
|
153
|
+
score[:summary] = item.at_css(".description").text if item.at_css(".description")
|
154
|
+
score
|
155
|
+
end
|
156
|
+
].flatten
|
157
|
+
}
|
158
|
+
end
|
159
|
+
|
160
|
+
def recognition
|
161
|
+
{
|
162
|
+
history: @doc.css('#awards .award').map do |item|
|
163
|
+
award = {}
|
164
|
+
award[:category] = "Award"
|
165
|
+
award[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
|
166
|
+
award[:from] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
|
167
|
+
dates = item.css(".date-range time")
|
168
|
+
award[:startDate] = dates[0].text if dates[0]
|
169
|
+
award[:endDate] = dates[1].text if dates[1]
|
170
|
+
award[:summary] = item.at_css(".description").text if item.at_css(".description")
|
171
|
+
award
|
172
|
+
end
|
173
|
+
}
|
174
|
+
end
|
175
|
+
|
176
|
+
def writing
|
177
|
+
{
|
178
|
+
history: @doc.css('#publications .publication').map do |item|
|
179
|
+
publication = {}
|
180
|
+
publication[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
|
181
|
+
publication[:url] = clean_up_linkedin_redirect_url(item.at_css(".item-title a")["href"]) if item.at_css(".item-title a")
|
182
|
+
publication[:publisher] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
|
183
|
+
publication[:date] = item.css(".date-range time").text if item.css(".date-range time")
|
184
|
+
publication[:summary] = item.at_css(".description").text if item.at_css(".description")
|
185
|
+
# publication[:authors] = item.css(".contributors .contributor").map { |c| c.text }.join("") if item.at_css(".contributors .contributor")
|
186
|
+
publication
|
187
|
+
end
|
188
|
+
}
|
189
|
+
end
|
190
|
+
|
191
|
+
def patents
|
192
|
+
{
|
193
|
+
history: @doc.css('#patents .patent').map do |item|
|
194
|
+
patent = {}
|
195
|
+
patent[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
|
196
|
+
patent[:url] = clean_up_linkedin_redirect_url(item.at_css(".item-title a")["href"]) if item.at_css(".item-title a")
|
197
|
+
patent[:date] = item.at_css(".date-range time").text if item.at_css(".date-range time")
|
198
|
+
patent[:status] = item.at_css(".date-range").children.reject{|e| e.name == "time" }.map(&:text).join().strip if item.at_css(".date-range")
|
199
|
+
patent[:description] = item.at_css(".description").text if item.at_css(".description")
|
200
|
+
patent
|
201
|
+
end
|
202
|
+
}
|
203
|
+
end
|
204
|
+
|
205
|
+
def languages
|
206
|
+
{
|
207
|
+
list: @doc.css('#languages .language').map do |item|
|
208
|
+
language = {}
|
209
|
+
language[:language] = item.at_css(".name").text if item.at_css(".name")
|
210
|
+
language[:level] = item.at_css(".proficiency").text if item.at_css(".proficiency")
|
211
|
+
language
|
212
|
+
end
|
213
|
+
}
|
214
|
+
end
|
215
|
+
|
216
|
+
def interests
|
217
|
+
{
|
218
|
+
list: @doc.css('#interests .interest span').map do |item|
|
219
|
+
{ name: item.text }
|
220
|
+
end
|
221
|
+
}
|
222
|
+
end
|
223
|
+
|
224
|
+
def affiliations
|
225
|
+
{
|
226
|
+
history: @doc.css('#organizations li').map do |item|
|
227
|
+
organization = {}
|
228
|
+
organization[:organization] = item.at_css(".item-title").text if item.at_css(".item-title")
|
229
|
+
organization[:roles] = [item.at_css(".item-subtitle").text] if item.at_css(".item-subtitle")
|
230
|
+
dates = item.css(".date-range time")
|
231
|
+
organization[:startDate] = dates[0].text if dates[0]
|
232
|
+
organization[:endDate] = dates[1].text if dates[1]
|
233
|
+
organization[:summary] = item.at_css(".description").text if item.at_css(".description")
|
234
|
+
organization
|
235
|
+
end
|
236
|
+
}
|
237
|
+
end
|
238
|
+
|
239
|
+
def service
|
240
|
+
{
|
241
|
+
history: @doc.css('#volunteering .position').map do |item|
|
242
|
+
volunteering = {}
|
243
|
+
volunteering[:category] = "Volunteer Work"
|
244
|
+
volunteering[:roles] = [item.at_css(".item-title").text] if item.at_css(".item-title")
|
245
|
+
volunteering[:organization] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
|
246
|
+
# volunteering[:cause] = item.at_css(".cause").text if item.at_css(".cause")
|
247
|
+
dates = item.css(".date-range time")
|
248
|
+
volunteering[:startDate] = dates[0].text if dates[0]
|
249
|
+
volunteering[:endDate] = dates[1].text if dates[1]
|
250
|
+
volunteering[:summary] = item.at_css(".description").text if item.at_css(".description")
|
251
|
+
volunteering
|
252
|
+
end
|
253
|
+
}
|
254
|
+
end
|
255
|
+
|
256
|
+
private
|
257
|
+
def clean_up_linkedin_redirect_url(link)
|
258
|
+
if link && link.include?("/redir/redirect")
|
259
|
+
uri = URI(link) if link
|
260
|
+
query = uri.query if uri
|
261
|
+
params = query.split("&") if query
|
262
|
+
url = params.select{|s| s.include?("url=")}.first if params
|
263
|
+
url = url.gsub("url=", "") if url
|
264
|
+
url = URI.unescape(url) if url
|
265
|
+
link = url if url
|
266
|
+
end
|
267
|
+
|
268
|
+
link
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|