resume_exporter 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e03168b0fb8cdb1b97b119e5cc6571d6013d5680
4
+ data.tar.gz: 360d4ae39463d516374a586c06b376d60d1fed9c
5
+ SHA512:
6
+ metadata.gz: 871738aaa534f937a70ec9c46440615e1b67b56a02a948ced09d5232bf63e80e0da17336590fe11bfd1e7f583c02f794643b294caa65dd2d764cb6956e7097cb
7
+ data.tar.gz: c891419756f962ab6386f0a850a4ea895cb084f4b84f5fe7fe9ae65445a19f01746f1c9694a77fefac0fd00e41b4180840d9fc8209dd66bdf2e5c2f90a62f5ee
@@ -0,0 +1,47 @@
1
+ require 'rubygems'
2
+ require 'commander/import'
3
+ require 'resume_exporter'
4
+
5
+ program :version, '0.0.1'
6
+ program :description, 'ResumeExporter is a tool to export data from public profile html files.
7
+ Save your profile (e.g. from LinkedIn, Xing, or Stackoverflow) as html and export to json or xml with the help of ResumeExporter.
8
+
9
+ Example 1: export resume data from .html file, and export as json:
10
+
11
+ resume_exporter /path/to/your/profile.html
12
+
13
+
14
+ Example 2: export resume data from .html file, export as json and save to file:
15
+
16
+ resume_exporter /path/to/your/profile.html >> your_file.json
17
+
18
+
19
+ Example 3: export resume data from .html file, and export as xml
20
+
21
+ resume_exporter /path/to/your/profile.html --format xml
22
+
23
+
24
+ Example 4: export resume data from .html file, export as xml and save to file
25
+
26
+ resume_exporter /path/to/your/profile.html --format xml >> your_file.xml'
27
+
28
+ command :export do |c|
29
+ c.syntax = 'resume_exporter export [options]'
30
+ c.description = 'export resume data from <file>, and export as json or xml
31
+ test.'
32
+ c.global_option '--format FORMAT', 'Specify the output format (default is json). You can choose json, xml, jsonresume, fresh, md, yaml'
33
+ c.example 'export resume data from .html file, and export as json', 'resume_exporter /path/to/your/profile.html'
34
+ c.example 'export resume data from .html file, and export as xml', 'resume_exporter /path/to/your/profile.html --format xml'
35
+ c.example 'export resume data from .html file, export as xml and save to file', 'resume_exporter /path/to/your/profile.html --format xml >> your_file.xml'
36
+ c.action do |args, options|
37
+ file = args.shift || abort('file argument required.')
38
+
39
+ options.default :format => "json"
40
+
41
+ r = ResumeExporter.new(file)
42
+
43
+ say r.export(format: options.format)
44
+ end
45
+ end
46
+
47
+ default_command :export
@@ -0,0 +1,16 @@
1
+ require 'tilt/jbuilder.rb'
2
+ require 'multi_json'
3
+ MultiJson.use :yajl
4
+ MultiJson.dump_options = {:pretty=>true}
5
+ Jbuilder.key_format camelize: :lower
6
+
7
+
8
+ module Exporter
9
+ module Json
10
+ def self.export(options = {})
11
+ template = options[:template] || "default"
12
+ @data = options[:data]
13
+ Tilt::JbuilderTemplate.new(File.expand_path("../templates/#{template}.json.jbuilder", __dir__)).render(self)
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,11 @@
1
+ require 'tilt'
2
+ require 'erb'
3
+
4
+ module Exporter
5
+ module Md
6
+ def self.export(options = {})
7
+ @data = options[:data]
8
+ Tilt.new(File.expand_path("../templates/default.md.erb", __dir__)).render(self)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,11 @@
1
+ require 'tilt'
2
+ require 'erb'
3
+
4
+ module Exporter
5
+ module Txt
6
+ def self.export(options = {})
7
+ @data = options[:data]
8
+ Tilt.new(File.expand_path("../templates/default.txt.erb", __dir__)).render(self)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,12 @@
1
+ require 'tilt'
2
+ require 'builder'
3
+
4
+ module Exporter
5
+ module Xml
6
+ def self.export(options = {})
7
+ template = options[:template] || "prtflio"
8
+ @data = options[:data]
9
+ Tilt.new(File.expand_path("../templates/#{template}.xml.builder", __dir__)).render(self)
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,10 @@
1
+ require 'yaml'
2
+
3
+ module Exporter
4
+ module Yaml
5
+ def self.export(options = {})
6
+ @data = options[:data]
7
+ @data.to_yaml
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,32 @@
1
+ module Extractor
2
+ class Base
3
+ def attributes
4
+ %w(
5
+ meta
6
+ basics
7
+ employment
8
+ education
9
+ projects
10
+ openSource
11
+ skills
12
+ qualifications
13
+ recognition
14
+ writing
15
+ reading
16
+ speaking
17
+ patents
18
+ languages
19
+ interests
20
+ extracurriculars
21
+ affiliations
22
+ governance
23
+ service
24
+ references
25
+ disposition
26
+ location
27
+ samples
28
+ testimonials
29
+ )
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,21 @@
1
+ require "extractors/html"
2
+ require "extractors/json"
3
+
4
+ module Extractor
5
+ module Factory
6
+ def self.extractor_for(file_path)
7
+ if File.file?(file_path)
8
+ case File.extname(file_path)
9
+ when ".html", ".htm"
10
+ Extractor::Html.new(file_path)
11
+ when ".json"
12
+ Extractor::Json.new(file_path)
13
+ else
14
+ raise "File type not supported. Only .html or .json"
15
+ end
16
+ else
17
+ raise "File not found"
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,40 @@
1
+ require "nokogiri"
2
+ require "extractors/base"
3
+ require "extractors/html/linkedin"
4
+ require "extractors/html/stackoverflow"
5
+ require "extractors/html/xing"
6
+
7
+ module Extractor
8
+ class Html < Base
9
+ def initialize(file_path)
10
+ @doc = File.open(file_path) { |f| Nokogiri::HTML(f, nil, 'utf-8') }
11
+ end
12
+
13
+ def extract
14
+ if is_linkedin?
15
+ extractor = Extractor::Linkedin.new(@doc)
16
+ elsif is_xing?
17
+ extractor = Extractor::Xing.new(@doc)
18
+ elsif is_stackoverflow?
19
+ extractor = Extractor::Stackoverflow.new(@doc)
20
+ end
21
+
22
+ attributes.reduce({}) do |hash, attr|
23
+ hash[attr.to_sym] = extractor.send(attr.to_sym) if extractor.respond_to?(attr.to_sym)
24
+ hash
25
+ end
26
+ end
27
+
28
+ def is_linkedin?
29
+ @doc.css("link").any?{|s| s["href"].include?("licdn.com") }
30
+ end
31
+
32
+ def is_xing?
33
+ @doc.css("link").any?{|s| s["href"].include?("xing.com") }
34
+ end
35
+
36
+ def is_stackoverflow?
37
+ @doc.css("link").any?{|s| s["href"].include?("sstatic.net") }
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,271 @@
1
+ require "nokogiri"
2
+ require "uri"
3
+
4
+ module Extractor
5
+ class Linkedin
6
+ def initialize(doc)
7
+ @doc = doc
8
+ end
9
+
10
+ def basics
11
+ {
12
+ name: name,
13
+ label: label,
14
+ image: image,
15
+ summary: summary,
16
+ contact: {
17
+ website: website,
18
+ location: location
19
+ }
20
+ }
21
+ end
22
+
23
+ def name
24
+ "#{first_name} #{last_name}" if first_name || last_name
25
+ end
26
+
27
+ def first_name
28
+ @doc.at_css('#name').text.strip.split(' ', 2)[0].strip if @doc.at_css('#name')
29
+ end
30
+
31
+ def last_name
32
+ @doc.at_css('#name').text.strip.split(' ', 2)[1].strip if @doc.at_css('#name')
33
+ end
34
+
35
+ def label
36
+ @doc.at_css('.headline.title').text.gsub(/\s+/, ' ').strip if @doc.at_css('.headline.title')
37
+ end
38
+
39
+ def image
40
+ @doc.at_css('.profile-picture img')['src'] if @doc.at_css('.profile-picture img')
41
+ end
42
+
43
+ def location
44
+ @location ||= (@doc.at_css('.locality').text if @doc.at_css('.locality'))
45
+ end
46
+
47
+ def website
48
+ link = @doc.at_css('.extra-info .websites li a')['href'] if @doc.at_css('.extra-info .websites li a')
49
+ link = clean_up_linkedin_redirect_url(link) if link
50
+ end
51
+
52
+ def summary
53
+ @doc.at_css('#summary .description').text.gsub(/[[:space:]]/, ' ').strip if @doc.at_css('#summary .description')
54
+ end
55
+
56
+ def employment
57
+ {
58
+ history: @doc.css('#experience .position').map do |item|
59
+ experience = {}
60
+ experience[:position] = item.at_css(".item-title").text if item.at_css(".item-title")
61
+ experience[:employer] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
62
+ dates = item.css(".date-range time")
63
+ experience[:startDate] = dates[0].text if dates[0]
64
+ experience[:endDate] = dates[1].text if dates[1]
65
+ experience[:summary] = item.at_css(".description").text if item.at_css(".description")
66
+ experience
67
+ end
68
+ }
69
+ end
70
+
71
+ def education
72
+ {
73
+ history: @doc.css('#education .school').map do |item|
74
+ education = {}
75
+ education[:institution] = item.at_css(".item-title").text if item.at_css(".item-title")
76
+
77
+ subtitle = item.at_css(".item-subtitle span").text.split(", ") if item.at_css(".item-subtitle span")
78
+ if subtitle && subtitle.length == 3
79
+ education[:degree] = subtitle[0]
80
+ education[:fieldOfStudy] = subtitle[1]
81
+ education[:grade] = subtitle[2]
82
+ elsif subtitle
83
+ education[:fieldOfStudy] = subtitle
84
+ end
85
+
86
+ item.css(".description p").map do |d|
87
+ if !d.text.include?("Activities and Societies: ")
88
+ education[:summary] = d.text
89
+ end
90
+ end
91
+
92
+ dates = item.css(".date-range time")
93
+ education[:startDate] = dates[0].text if dates[0]
94
+ education[:endDate] = dates[1].text if dates[1]
95
+ education
96
+ end
97
+ }
98
+ end
99
+
100
+ def projects
101
+ {
102
+ history: @doc.css('#projects .project').map do |item|
103
+ project = {}
104
+ project[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
105
+ project[:url] = clean_up_linkedin_redirect_url(item.at_css(".item-title a")["href"]) if item.at_css(".item-title a")
106
+ dates = item.css(".date-range time")
107
+ project[:startDate] = dates[0].text if dates[0]
108
+ project[:endDate] = dates[1].text if dates[1]
109
+ project[:description] = item.at_css(".description").text if item.at_css(".description")
110
+ project
111
+ end
112
+ }
113
+ end
114
+
115
+ def skills
116
+ {
117
+ sets: @doc.css('#skills .skill a').map do |item|
118
+ { name: item.text }
119
+ end
120
+ }
121
+ end
122
+
123
+ def qualifications
124
+ {
125
+ history: [
126
+ @doc.css('#certifications .certification').map do |item|
127
+ certification = {}
128
+ certification[:category] = "Certification"
129
+ certification[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
130
+ certification[:from] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
131
+ certification[:url] = clean_up_linkedin_redirect_url(item.at_css(".item-title a")["href"]) if item.at_css(".item-title a")
132
+ dates = item.css(".date-range time")
133
+ certification[:startDate] = dates[0].text if dates[0]
134
+ certification[:endDate] = dates[1].text if dates[1]
135
+ certification
136
+ end,
137
+
138
+ @doc.css('#courses .course').map do |item|
139
+ course = {}
140
+ course[:category] = "Course"
141
+ course[:title] = item.at_css("span").text
142
+ course
143
+ end,
144
+
145
+ @doc.css('#scores .score').map do |item|
146
+ score = {}
147
+ score[:category] = "Test Score"
148
+ score[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
149
+ # score[:score] = item.at_css(".item-subtitle").text.gsub(/[^0-9]/, "") if item.at_css(".item-subtitle")
150
+ dates = item.css(".date-range time")
151
+ score[:startDate] = dates[0].text if dates[0]
152
+ score[:endDate] = dates[1].text if dates[1]
153
+ score[:summary] = item.at_css(".description").text if item.at_css(".description")
154
+ score
155
+ end
156
+ ].flatten
157
+ }
158
+ end
159
+
160
+ def recognition
161
+ {
162
+ history: @doc.css('#awards .award').map do |item|
163
+ award = {}
164
+ award[:category] = "Award"
165
+ award[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
166
+ award[:from] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
167
+ dates = item.css(".date-range time")
168
+ award[:startDate] = dates[0].text if dates[0]
169
+ award[:endDate] = dates[1].text if dates[1]
170
+ award[:summary] = item.at_css(".description").text if item.at_css(".description")
171
+ award
172
+ end
173
+ }
174
+ end
175
+
176
+ def writing
177
+ {
178
+ history: @doc.css('#publications .publication').map do |item|
179
+ publication = {}
180
+ publication[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
181
+ publication[:url] = clean_up_linkedin_redirect_url(item.at_css(".item-title a")["href"]) if item.at_css(".item-title a")
182
+ publication[:publisher] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
183
+ publication[:date] = item.css(".date-range time").text if item.css(".date-range time")
184
+ publication[:summary] = item.at_css(".description").text if item.at_css(".description")
185
+ # publication[:authors] = item.css(".contributors .contributor").map { |c| c.text }.join("") if item.at_css(".contributors .contributor")
186
+ publication
187
+ end
188
+ }
189
+ end
190
+
191
+ def patents
192
+ {
193
+ history: @doc.css('#patents .patent').map do |item|
194
+ patent = {}
195
+ patent[:title] = item.at_css(".item-title").text if item.at_css(".item-title")
196
+ patent[:url] = clean_up_linkedin_redirect_url(item.at_css(".item-title a")["href"]) if item.at_css(".item-title a")
197
+ patent[:date] = item.at_css(".date-range time").text if item.at_css(".date-range time")
198
+ patent[:status] = item.at_css(".date-range").children.reject{|e| e.name == "time" }.map(&:text).join().strip if item.at_css(".date-range")
199
+ patent[:description] = item.at_css(".description").text if item.at_css(".description")
200
+ patent
201
+ end
202
+ }
203
+ end
204
+
205
+ def languages
206
+ {
207
+ list: @doc.css('#languages .language').map do |item|
208
+ language = {}
209
+ language[:language] = item.at_css(".name").text if item.at_css(".name")
210
+ language[:level] = item.at_css(".proficiency").text if item.at_css(".proficiency")
211
+ language
212
+ end
213
+ }
214
+ end
215
+
216
+ def interests
217
+ {
218
+ list: @doc.css('#interests .interest span').map do |item|
219
+ { name: item.text }
220
+ end
221
+ }
222
+ end
223
+
224
+ def affiliations
225
+ {
226
+ history: @doc.css('#organizations li').map do |item|
227
+ organization = {}
228
+ organization[:organization] = item.at_css(".item-title").text if item.at_css(".item-title")
229
+ organization[:roles] = [item.at_css(".item-subtitle").text] if item.at_css(".item-subtitle")
230
+ dates = item.css(".date-range time")
231
+ organization[:startDate] = dates[0].text if dates[0]
232
+ organization[:endDate] = dates[1].text if dates[1]
233
+ organization[:summary] = item.at_css(".description").text if item.at_css(".description")
234
+ organization
235
+ end
236
+ }
237
+ end
238
+
239
+ def service
240
+ {
241
+ history: @doc.css('#volunteering .position').map do |item|
242
+ volunteering = {}
243
+ volunteering[:category] = "Volunteer Work"
244
+ volunteering[:roles] = [item.at_css(".item-title").text] if item.at_css(".item-title")
245
+ volunteering[:organization] = item.at_css(".item-subtitle").text if item.at_css(".item-subtitle")
246
+ # volunteering[:cause] = item.at_css(".cause").text if item.at_css(".cause")
247
+ dates = item.css(".date-range time")
248
+ volunteering[:startDate] = dates[0].text if dates[0]
249
+ volunteering[:endDate] = dates[1].text if dates[1]
250
+ volunteering[:summary] = item.at_css(".description").text if item.at_css(".description")
251
+ volunteering
252
+ end
253
+ }
254
+ end
255
+
256
+ private
257
+ def clean_up_linkedin_redirect_url(link)
258
+ if link && link.include?("/redir/redirect")
259
+ uri = URI(link) if link
260
+ query = uri.query if uri
261
+ params = query.split("&") if query
262
+ url = params.select{|s| s.include?("url=")}.first if params
263
+ url = url.gsub("url=", "") if url
264
+ url = URI.unescape(url) if url
265
+ link = url if url
266
+ end
267
+
268
+ link
269
+ end
270
+ end
271
+ end