resume_exporter 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e03168b0fb8cdb1b97b119e5cc6571d6013d5680
4
+ data.tar.gz: 360d4ae39463d516374a586c06b376d60d1fed9c
5
+ SHA512:
6
+ metadata.gz: 871738aaa534f937a70ec9c46440615e1b67b56a02a948ced09d5232bf63e80e0da17336590fe11bfd1e7f583c02f794643b294caa65dd2d764cb6956e7097cb
7
+ data.tar.gz: c891419756f962ab6386f0a850a4ea895cb084f4b84f5fe7fe9ae65445a19f01746f1c9694a77fefac0fd00e41b4180840d9fc8209dd66bdf2e5c2f90a62f5ee
@@ -0,0 +1,47 @@
1
+ require 'rubygems'
2
+ require 'commander/import'
3
+ require 'resume_exporter'
4
+
5
+ program :version, '0.0.1'
6
+ program :description, 'ResumeExporter is a tool to export data from public profile html files.
7
+ Save your profile (e.g. from LinkedIn, Xing, or Stackoverflow) as html and export to json or xml with the help of ResumeExporter.
8
+
9
+ Example 1: export resume data from .html file, and export as json:
10
+
11
+ resume_exporter /path/to/your/profile.html
12
+
13
+
14
+ Example 2: export resume data from .html file, export as json and save to file:
15
+
16
+ resume_exporter /path/to/your/profile.html >> your_file.json
17
+
18
+
19
+ Example 3: export resume data from .html file, and export as xml
20
+
21
+ resume_exporter /path/to/your/profile.html --format xml
22
+
23
+
24
+ Example 4: export resume data from .html file, export as xml and save to file
25
+
26
+ resume_exporter /path/to/your/profile.html --format xml >> your_file.xml'
27
+
28
+ command :export do |c|
29
+ c.syntax = 'resume_exporter export [options]'
30
+ c.description = 'export resume data from <file>, and export as json or xml
31
+ test.'
32
+ c.global_option '--format FORMAT', 'Specify the output format (default is json). You can choose json, xml, jsonresume, fresh, md, yaml'
33
+ c.example 'export resume data from .html file, and export as json', 'resume_exporter /path/to/your/profile.html'
34
+ c.example 'export resume data from .html file, and export as xml', 'resume_exporter /path/to/your/profile.html --format xml'
35
+ c.example 'export resume data from .html file, export as xml and save to file', 'resume_exporter /path/to/your/profile.html --format xml >> your_file.xml'
36
+ c.action do |args, options|
37
+ file = args.shift || abort('file argument required.')
38
+
39
+ options.default :format => "json"
40
+
41
+ r = ResumeExporter.new(file)
42
+
43
+ say r.export(format: options.format)
44
+ end
45
+ end
46
+
47
+ default_command :export
@@ -0,0 +1,16 @@
1
+ require 'tilt/jbuilder.rb'
2
+ require 'multi_json'
3
+ MultiJson.use :yajl
4
+ MultiJson.dump_options = {:pretty=>true}
5
+ Jbuilder.key_format camelize: :lower
6
+
7
+
8
+ module Exporter
9
+ module Json
10
+ def self.export(options = {})
11
+ template = options[:template] || "default"
12
+ @data = options[:data]
13
+ Tilt::JbuilderTemplate.new(File.expand_path("../templates/#{template}.json.jbuilder", __dir__)).render(self)
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,11 @@
1
+ require 'tilt'
2
+ require 'erb'
3
+
4
+ module Exporter
5
+ module Md
6
+ def self.export(options = {})
7
+ @data = options[:data]
8
+ Tilt.new(File.expand_path("../templates/default.md.erb", __dir__)).render(self)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,11 @@
1
+ require 'tilt'
2
+ require 'erb'
3
+
4
+ module Exporter
5
+ module Txt
6
+ def self.export(options = {})
7
+ @data = options[:data]
8
+ Tilt.new(File.expand_path("../templates/default.txt.erb", __dir__)).render(self)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,12 @@
1
+ require 'tilt'
2
+ require 'builder'
3
+
4
+ module Exporter
5
+ module Xml
6
+ def self.export(options = {})
7
+ template = options[:template] || "prtflio"
8
+ @data = options[:data]
9
+ Tilt.new(File.expand_path("../templates/#{template}.xml.builder", __dir__)).render(self)
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,10 @@
1
+ require 'yaml'
2
+
3
+ module Exporter
4
+ module Yaml
5
+ def self.export(options = {})
6
+ @data = options[:data]
7
+ @data.to_yaml
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,32 @@
1
+ module Extractor
2
+ class Base
3
+ def attributes
4
+ %w(
5
+ meta
6
+ basics
7
+ employment
8
+ education
9
+ projects
10
+ openSource
11
+ skills
12
+ qualifications
13
+ recognition
14
+ writing
15
+ reading
16
+ speaking
17
+ patents
18
+ languages
19
+ interests
20
+ extracurriculars
21
+ affiliations
22
+ governance
23
+ service
24
+ references
25
+ disposition
26
+ location
27
+ samples
28
+ testimonials
29
+ )
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,21 @@
1
+ require "extractors/html"
2
+ require "extractors/json"
3
+
4
+ module Extractor
5
+ module Factory
6
+ def self.extractor_for(file_path)
7
+ if File.file?(file_path)
8
+ case File.extname(file_path)
9
+ when ".html", ".htm"
10
+ Extractor::Html.new(file_path)
11
+ when ".json"
12
+ Extractor::Json.new(file_path)
13
+ else
14
+ raise "File type not supported. Only .html or .json"
15
+ end
16
+ else
17
+ raise "File not found"
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,40 @@
1
+ require "nokogiri"
2
+ require "extractors/base"
3
+ require "extractors/html/linkedin"
4
+ require "extractors/html/stackoverflow"
5
+ require "extractors/html/xing"
6
+
7
+ module Extractor
8
+ class Html < Base
9
+ def initialize(file_path)
10
+ @doc = File.open(file_path) { |f| Nokogiri::HTML(f, nil, 'utf-8') }
11
+ end
12
+
13
+ def extract
14
+ if is_linkedin?
15
+ extractor = Extractor::Linkedin.new(@doc)
16
+ elsif is_xing?
17
+ extractor = Extractor::Xing.new(@doc)
18
+ elsif is_stackoverflow?
19
+ extractor = Extractor::Stackoverflow.new(@doc)
20
+ end
21
+
22
+ attributes.reduce({}) do |hash, attr|
23
+ hash[attr.to_sym] = extractor.send(attr.to_sym) if extractor.respond_to?(attr.to_sym)
24
+ hash
25
+ end
26
+ end
27
+
28
+ def is_linkedin?
29
+ @doc.css("link").any?{|s| s["href"].include?("licdn.com") }
30
+ end
31
+
32
+ def is_xing?
33
+ @doc.css("link").any?{|s| s["href"].include?("xing.com") }
34
+ end
35
+
36
+ def is_stackoverflow?
37
+ @doc.css("link").any?{|s| s["href"].include?("sstatic.net") }
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,271 @@
1
+ require "nokogiri"
2
+ require "uri"
3
+
4
+ module Extractor
5
+ class Linkedin
6
+ def initialize(doc)
7
+ @doc = doc
8
+ end
9
+
10
+ def basics
11
+ {
12
+ name: name,
13
+ label: label,
14
+ image: image,
15
+ summary: summary,
16
+ contact: {
17
+ website: website,
18
+ location: location
19
+ }
20
+ }
21
+ end
22
+
23
+ def name
24
+ "#{first_name} #{last_name}" if first_name || last_name
25
+ end
26
+
27
+ def first_name
28
+ @doc.at_css('#name').text.strip.split(' ', 2)[0].strip if @doc.at_css('#name')
29
+ end
30
+
31
+ def last_name
32
+ @doc.at_css('#name').text.strip.split(' ', 2)[1].strip if @doc.at_css('#name')
33
+ end
34
+
35
+ def label
36
+ @doc.at_css('.headline.title').text.gsub(/\s+/, ' ').strip if @doc.at_css('.headline.title')
37
+ end
38
+
39
+ def image
40
+ @doc.at_css('.profile-picture img')['src'] if @doc.at_css('.profile-picture img')
41
+ end
42
+
43
+ def location
44
+ @location ||= (@doc.at_css('.locality').text if @doc.at_css('.locality'))
45
+ end
46
+
47
+ def website
48
+ link = @doc.at_css('.extra-info .websites li a')['href'] if @doc.at_css('.extra-info .websites li a')
49
+ link = clean_up_linkedin_redirect_url(link) if link
50
+ end
51
+
52
+ def summary
53
+ @doc.at_css('#summary .description').text.gsub(/[[:space:]]/, ' ').strip if @doc.at_css('#summary .description')
54
+ end
55
+
56
+ def employment
57
+ {
58
+ history: @doc.css('#experience .position').map do |item|
59
+ experience = {}
60
+ experience[:position] = item.at_css(".item-title").text if item.at_css(".item-title")
61
+ experience[:employer] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
62
+ dates = item.css(".date-range time")
63
+ experience[:startDate] = dates[0].text if dates[0]
64
+ experience[:endDate] = dates[1].text if dates[1]
65
+ experience[:summary] = item.at_css(".description").text if item.at_css(".description")
66
+ experience
67
+ end
68
+ }
69
+ end
70
+
71
+ def education
72
+ {
73
+ history: @doc.css('#education .school').map do |item|
74
+ education = {}
75
+ education[:institution] = item.at_css(".item-title").text if item.at_css(".item-title")
76
+
77
+ subtitle = item.at_css(".item-subtitle span").text.split(", ") if item.at_css(".item-subtitle span")
78
+ if subtitle && subtitle.length == 3
79
+ education[:degree] = subtitle[0]
80
+ education[:fieldOfStudy] = subtitle[1]
81
+ education[:grade] = subtitle[2]
82
+ elsif subtitle
83
+ education[:fieldOfStudy] = subtitle
84
+ end
85
+
86
+ item.css(".description p").map do |d|
87
+ if !d.text.include?("Activities and Societies: ")
88
+ education[:summary] = d.text
89
+ end
90
+ end
91
+
92
+ dates = item.css(".date-range time")
93
+ education[:startDate] = dates[0].text if dates[0]
94
+ education[:endDate] = dates[1].text if dates[1]
95
+ education
96
+ end
97
+ }
98
+ end
99
+
100
+ def projects
101
+ {
102
+ history: @doc.css('#projects .project').map do |item|
103
+ project = {}
104
+ project[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
105
+ project[:url] = clean_up_linkedin_redirect_url(item.at_css(".item-title a")["href"]) if item.at_css(".item-title a")
106
+ dates = item.css(".date-range time")
107
+ project[:startDate] = dates[0].text if dates[0]
108
+ project[:endDate] = dates[1].text if dates[1]
109
+ project[:description] = item.at_css(".description").text if item.at_css(".description")
110
+ project
111
+ end
112
+ }
113
+ end
114
+
115
+ def skills
116
+ {
117
+ sets: @doc.css('#skills .skill a').map do |item|
118
+ { name: item.text }
119
+ end
120
+ }
121
+ end
122
+
123
+ def qualifications
124
+ {
125
+ history: [
126
+ @doc.css('#certifications .certification').map do |item|
127
+ certification = {}
128
+ certification[:category] = "Certification"
129
+ certification[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
130
+ certification[:from] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
131
+ certification[:url] = clean_up_linkedin_redirect_url(item.at_css(".item-title a")["href"]) if item.at_css(".item-title a")
132
+ dates = item.css(".date-range time")
133
+ certification[:startDate] = dates[0].text if dates[0]
134
+ certification[:endDate] = dates[1].text if dates[1]
135
+ certification
136
+ end,
137
+
138
+ @doc.css('#courses .course').map do |item|
139
+ course = {}
140
+ course[:category] = "Course"
141
+ course[:title] = item.at_css("span").text
142
+ course
143
+ end,
144
+
145
+ @doc.css('#scores .score').map do |item|
146
+ score = {}
147
+ score[:category] = "Test Score"
148
+ score[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
149
+ # score[:score] = item.at_css(".item-subtitle").text.gsub(/[^0-9]/, "") if item.at_css(".item-subtitle")
150
+ dates = item.css(".date-range time")
151
+ score[:startDate] = dates[0].text if dates[0]
152
+ score[:endDate] = dates[1].text if dates[1]
153
+ score[:summary] = item.at_css(".description").text if item.at_css(".description")
154
+ score
155
+ end
156
+ ].flatten
157
+ }
158
+ end
159
+
160
+ def recognition
161
+ {
162
+ history: @doc.css('#awards .award').map do |item|
163
+ award = {}
164
+ award[:category] = "Award"
165
+ award[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
166
+ award[:from] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
167
+ dates = item.css(".date-range time")
168
+ award[:startDate] = dates[0].text if dates[0]
169
+ award[:endDate] = dates[1].text if dates[1]
170
+ award[:summary] = item.at_css(".description").text if item.at_css(".description")
171
+ award
172
+ end
173
+ }
174
+ end
175
+
176
+ def writing
177
+ {
178
+ history: @doc.css('#publications .publication').map do |item|
179
+ publication = {}
180
+ publication[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
181
+ publication[:url] = clean_up_linkedin_redirect_url(item.at_css(".item-title a")["href"]) if item.at_css(".item-title a")
182
+ publication[:publisher] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
183
+ publication[:date] = item.css(".date-range time").text if item.css(".date-range time")
184
+ publication[:summary] = item.at_css(".description").text if item.at_css(".description")
185
+ # publication[:authors] = item.css(".contributors .contributor").map { |c| c.text }.join("") if item.at_css(".contributors .contributor")
186
+ publication
187
+ end
188
+ }
189
+ end
190
+
191
+ def patents
192
+ {
193
+ history: @doc.css('#patents .patent').map do |item|
194
+ patent = {}
195
+ patent[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
196
+ patent[:url] = clean_up_linkedin_redirect_url(item.at_css(".item-title a")["href"]) if item.at_css(".item-title a")
197
+ patent[:date] = item.at_css(".date-range time").text if item.at_css(".date-range time")
198
+ patent[:status] = item.at_css(".date-range").children.reject{|e| e.name == "time" }.map(&:text).join().strip if item.at_css(".date-range")
199
+ patent[:description] = item.at_css(".description").text if item.at_css(".description")
200
+ patent
201
+ end
202
+ }
203
+ end
204
+
205
+ def languages
206
+ {
207
+ list: @doc.css('#languages .language').map do |item|
208
+ language = {}
209
+ language[:language] = item.at_css(".name").text if item.at_css(".name")
210
+ language[:level] = item.at_css(".proficiency").text if item.at_css(".proficiency")
211
+ language
212
+ end
213
+ }
214
+ end
215
+
216
+ def interests
217
+ {
218
+ list: @doc.css('#interests .interest span').map do |item|
219
+ { name: item.text }
220
+ end
221
+ }
222
+ end
223
+
224
+ def affiliations
225
+ {
226
+ history: @doc.css('#organizations li').map do |item|
227
+ organization = {}
228
+ organization[:organization] = item.at_css(".item-title").text if item.at_css(".item-title")
229
+ organization[:roles] = [item.at_css(".item-subtitle").text] if item.at_css(".item-subtitle")
230
+ dates = item.css(".date-range time")
231
+ organization[:startDate] = dates[0].text if dates[0]
232
+ organization[:endDate] = dates[1].text if dates[1]
233
+ organization[:summary] = item.at_css(".description").text if item.at_css(".description")
234
+ organization
235
+ end
236
+ }
237
+ end
238
+
239
+ def service
240
+ {
241
+ history: @doc.css('#volunteering .position').map do |item|
242
+ volunteering = {}
243
+ volunteering[:category] = "Volunteer Work"
244
+ volunteering[:roles] = [item.at_css(".item-title").text] if item.at_css(".item-title")
245
+ volunteering[:organization] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
246
+ # volunteering[:cause] = item.at_css(".cause").text if item.at_css(".cause")
247
+ dates = item.css(".date-range time")
248
+ volunteering[:startDate] = dates[0].text if dates[0]
249
+ volunteering[:endDate] = dates[1].text if dates[1]
250
+ volunteering[:summary] = item.at_css(".description").text if item.at_css(".description")
251
+ volunteering
252
+ end
253
+ }
254
+ end
255
+
256
+ private
257
+ def clean_up_linkedin_redirect_url(link)
258
+ if link && link.include?("/redir/redirect")
259
+ uri = URI(link) if link
260
+ query = uri.query if uri
261
+ params = query.split("&") if query
262
+ url = params.select{|s| s.include?("url=")}.first if params
263
+ url = url.gsub("url=", "") if url
264
+ url = URI.unescape(url) if url
265
+ link = url if url
266
+ end
267
+
268
+ link
269
+ end
270
+ end
271
+ end