resumr 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/resumr.rb +94 -3
- metadata +18 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ec390ba5a36ac086457d9d3f011bcfb45faa65b72a37c887470472441ad691d2
|
|
4
|
+
data.tar.gz: f8e0f03baffef2ce9c05167af5b5eb13fecbc10d0bc4f0fe76d13130fa8d42b9
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: feaf08ef21f23f66133de45e342517cd8a0c4df01e3f72e4a6baa2572547505558e0edebe53284512c80eeee37cef876aa4ca2ca142f68500b2f7cdaa718687f
|
|
7
|
+
data.tar.gz: 26a6171bcca22eee7402d51ab088bb8118195445304e88ae27db3157a99af235883961d7c20b716c66ad3b9e886585f70bc498a2e562f378bc6f152a7e75f13a
|
data/lib/resumr.rb
CHANGED
|
@@ -1,9 +1,100 @@
|
|
|
1
|
+
require 'pdf-reader'
|
|
2
|
+
|
|
1
3
|
class Resumr
|
|
2
|
-
def self.
|
|
3
|
-
require 'pdf-reader'
|
|
4
|
+
def self.new(file)
|
|
4
5
|
reader = PDF::Reader.new(file)
|
|
5
6
|
info = reader.info
|
|
6
7
|
pages = reader.pages
|
|
7
|
-
|
|
8
|
+
|
|
9
|
+
text = pages.map{|x| x.text}.reduce{|x,y| x << y}.gsub(/Page \w of \w/,"")
|
|
10
|
+
right, left = "", ""
|
|
11
|
+
|
|
12
|
+
text.each_line do |line|
|
|
13
|
+
if line.length <= 40 || line.include?("Top Skills") || line.include?("Contact")
|
|
14
|
+
left += line.strip + "\n"
|
|
15
|
+
else
|
|
16
|
+
left += line.slice(0,40).strip + "\n"
|
|
17
|
+
right += line.slice(40,line.length).strip + "\n"
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
linkedin_url_start_index = left.index("www.linkedin.com")
|
|
22
|
+
linkedin_url_end_index = left.index("(LinkedIn)")
|
|
23
|
+
|
|
24
|
+
resume = {
|
|
25
|
+
source: info[:Author],
|
|
26
|
+
time: Time.new(info[:CreationDate].gsub(/D:/,"")),
|
|
27
|
+
text: text,
|
|
28
|
+
left: left,
|
|
29
|
+
right: right,
|
|
30
|
+
pdf_reader: reader,
|
|
31
|
+
sections: subsection_title_list(left).merge!(subsection_title_list(right,true)),
|
|
32
|
+
linkedin_url: left[linkedin_url_start_index..linkedin_url_end_index-1].gsub("\n","").strip
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
name = resume[:sections][:beginning].lines[1].split("\n").first
|
|
36
|
+
resume.merge!({name: name})
|
|
37
|
+
|
|
38
|
+
return resume
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def self.subsection_title_list(str, with_first_block_sepparate = false)
|
|
42
|
+
possible_titles = [
|
|
43
|
+
["Top Skills", "Principais competências"],
|
|
44
|
+
["Certifications"],
|
|
45
|
+
["Contact", "Contato", "Contatar"],
|
|
46
|
+
["Experience","Experiência"],
|
|
47
|
+
["Education", "Formação Acadêmica"],
|
|
48
|
+
["Summary","Resumo"],
|
|
49
|
+
["Languages"],
|
|
50
|
+
["Publications"]
|
|
51
|
+
]
|
|
52
|
+
indexes = {}
|
|
53
|
+
possible_titles.each do |tit|
|
|
54
|
+
new_keyvalue = {}
|
|
55
|
+
title = tit[0]
|
|
56
|
+
index = nil
|
|
57
|
+
tit.each do |synonym|
|
|
58
|
+
index = str.index(synonym) if index.nil?
|
|
59
|
+
end
|
|
60
|
+
if ! index.nil?
|
|
61
|
+
new_keyvalue.store(
|
|
62
|
+
index,
|
|
63
|
+
title.gsub(" ","_").downcase.to_sym
|
|
64
|
+
)
|
|
65
|
+
indexes.merge!(new_keyvalue)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
retorn = {}
|
|
70
|
+
sorted_indexes = indexes.keys.sort
|
|
71
|
+
|
|
72
|
+
if with_first_block_sepparate
|
|
73
|
+
first_block_index = sorted_indexes[0]
|
|
74
|
+
|
|
75
|
+
retorn.merge!(
|
|
76
|
+
{beginning:
|
|
77
|
+
str[0..(first_block_index-1)]
|
|
78
|
+
}
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
sorted_indexes.each.with_index do |current_block_index, k|
|
|
84
|
+
new_keyvalue = {}
|
|
85
|
+
next_block_index = 0
|
|
86
|
+
if k == (sorted_indexes.length - 1)
|
|
87
|
+
next_block_index = str.length
|
|
88
|
+
else
|
|
89
|
+
next_block_index = sorted_indexes[k+1]
|
|
90
|
+
end
|
|
91
|
+
new_keyvalue.store(
|
|
92
|
+
indexes[current_block_index],
|
|
93
|
+
str[current_block_index..next_block_index].lines[1..-1].join
|
|
94
|
+
)
|
|
95
|
+
retorn.merge!(new_keyvalue)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
return retorn
|
|
8
99
|
end
|
|
9
100
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: resumr
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Sal, the Developer
|
|
@@ -14,17 +14,31 @@ dependencies:
|
|
|
14
14
|
name: pdf-reader
|
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
|
16
16
|
requirements:
|
|
17
|
-
- -
|
|
17
|
+
- - ">="
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
19
|
version: 1.0.0
|
|
20
20
|
type: :runtime
|
|
21
21
|
prerelease: false
|
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
23
|
requirements:
|
|
24
|
-
- -
|
|
24
|
+
- - ">="
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
26
|
version: 1.0.0
|
|
27
|
-
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: awesome_print
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '1.8'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '1.8'
|
|
41
|
+
description: A resume parsing gem. Specifically for LinkedIn PDFs
|
|
28
42
|
email: sal@salthedeveloper.com
|
|
29
43
|
executables: []
|
|
30
44
|
extensions: []
|