jst-parser 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/jst.rb +35 -24
- data/lib/jst/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
YmZlMDNmYjY5MTYzYTFjNmQ5NjFiMjhkOTY4NDkwYjhjMWZiOTg3OQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZjI2MGI5ZjlmMGFiYWI1MWUzOWFlOTI4MzAwYTk5YzBjY2M0YjY3ZQ==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MWNhYWQ2ODA4Y2FiOWQxOTNiNjUzMWNhOWRiYzk0NmM3MDVhODdlMDE3YmRk
|
10
|
+
MzE5MWVmZDU3YmE0OGJkYWU1NTZjNWZhZGRmMGJjYTI3MzUxMDhhMGExZWFj
|
11
|
+
ZDBhODVmNzJiZGQyODhhYjFjOGYwNGE5MjVjMmUwZjFkMDRkYTY=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZjA0YzY4MDJlNDY5ZGU1N2FjZjYyMDFjY2U0NzQ2YjgyNGY0YTRjZDk5ODQ3
|
14
|
+
YjM2OWE3YTA4N2ZjMjNiY2YzODg3MjM4ZTE0MzZjYjA5ODFhMTE2ZGE3ZGM3
|
15
|
+
ZmE4NmY2MzA2ZDM4YzcyNjg0YzY2OWY2YzM5YThkZDkzOGI4ZDI=
|
data/lib/jst.rb
CHANGED
@@ -14,6 +14,7 @@ module JST
|
|
14
14
|
BRANCH_DOD = 'Department of Defense'
|
15
15
|
|
16
16
|
class BadPDFError < StandardError ; end
|
17
|
+
class UnknownPDFParsingError < StandardError ; end
|
17
18
|
|
18
19
|
def parse(pdf_file)
|
19
20
|
unless @debug
|
@@ -48,10 +49,12 @@ module JST
|
|
48
49
|
raise BadPDFError, "Could not parse JST."
|
49
50
|
rescue ArgumentError
|
50
51
|
raise BadPDFError, "PDF text parsing exception."
|
51
|
-
rescue
|
52
|
-
|
53
|
-
|
54
|
-
|
52
|
+
rescue => exc
|
53
|
+
raise UnknownPDFParsingError, "#{exc}"
|
54
|
+
# rescue PDF::Reader::UnknownGlyphWidthError
|
55
|
+
# # Waiting for this exception to be commited from the following pull request:
|
56
|
+
# # https://github.com/yob/pdf-reader/pull/105
|
57
|
+
# raise UnknownPDFParsingError, "PDF text parsing exception."
|
55
58
|
end
|
56
59
|
end
|
57
60
|
end
|
@@ -68,10 +71,12 @@ module JST
|
|
68
71
|
skills_upper_regexp = /(.+)\s+(\d)\s+\w{2}\s+U/
|
69
72
|
skills_vocational_regexp = /(.+)\s+(\d)\s+\w{2}\s+V/
|
70
73
|
skills_graduate_regexp = /(.+)\s+(\d)\s+\w{2}\s+G/
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
74
|
+
ignore_regexp = []
|
75
|
+
ignore_regexp.push (/PRIVACY ACT INFORMATION/)
|
76
|
+
ignore_regexp.push (/\(\d{1,2}\/\d{1,2}\)\(\d{1,2}\/\d{1,2}\)/)
|
77
|
+
ignore_regexp.push (/None|NONE ASSIGNED/)
|
78
|
+
ignore_regexp.push (/^(\d|L|U|V|G|SH)$/)
|
79
|
+
ignore_regexp.push (/\*\*/)
|
75
80
|
ignore_list = nil
|
76
81
|
@positions = []
|
77
82
|
@skills_all = {}
|
@@ -96,15 +101,15 @@ module JST
|
|
96
101
|
if line.match(experience_section_start)
|
97
102
|
# Reached the job experience section. Begin parsing out.
|
98
103
|
inside_experience_section = true
|
99
|
-
puts "-- -- --
|
104
|
+
puts "-- -- -- JOB EXPERIENCE SECTION START -- -- -- " if @debug
|
100
105
|
next
|
101
106
|
end
|
102
107
|
if line.match(experience_section_end)
|
103
|
-
puts "-- -- --
|
108
|
+
puts "-- -- -- JOB EXPERIENCE SECTION END -- -- -- " if @debug
|
104
109
|
|
105
110
|
# Finished last job position. Appent previous job position.
|
106
111
|
if !position_title.empty? && !position_desc.empty?
|
107
|
-
puts '-- --
|
112
|
+
puts '-- -- APPENDING PREVIOUS POSITION -- -- ' if @debug
|
108
113
|
|
109
114
|
append_position(position_branch, position_date_begin, position_date_end, position_title, position_desc)
|
110
115
|
position_branch = ''
|
@@ -117,8 +122,9 @@ module JST
|
|
117
122
|
end
|
118
123
|
|
119
124
|
if inside_experience_section
|
125
|
+
line.strip!
|
120
126
|
if line.match(experience_regexp)
|
121
|
-
puts "
|
127
|
+
puts "-- -- NEW EXPERIENCE" if @debug
|
122
128
|
|
123
129
|
# Determine which branch this job title falls under
|
124
130
|
position_branch = BRANCH_ARMY if line.match(/AR-/)
|
@@ -130,9 +136,10 @@ module JST
|
|
130
136
|
|
131
137
|
# Determine the service date (dd-MMM-yyyy)
|
132
138
|
if date_match = line.match(experience_date)
|
133
|
-
puts " ^^^^^ PARSING DATE ^^^^^^ "
|
134
139
|
position_date_begin = date_match[1] unless date_match[1].nil?
|
140
|
+
puts "-- START DATE: #{position_date_begin}" unless date_match[1].nil?
|
135
141
|
position_date_end = date_match[2] unless date_match[2].nil?
|
142
|
+
puts "-- END DATE: #{position_date_end}" unless date_match[2].nil?
|
136
143
|
end
|
137
144
|
|
138
145
|
# Next line will be the job titles
|
@@ -152,12 +159,12 @@ module JST
|
|
152
159
|
end
|
153
160
|
|
154
161
|
if at_job_title
|
155
|
-
puts "
|
162
|
+
puts "-- JOB TITLE: #{line}" if @debug
|
156
163
|
at_job_title = false
|
157
164
|
position_title = line
|
158
165
|
|
159
166
|
# Next line will be the job description starting point
|
160
|
-
puts "
|
167
|
+
puts "-- JOB DESCRIPTION:"
|
161
168
|
at_job_desc = true
|
162
169
|
next
|
163
170
|
end
|
@@ -177,7 +184,7 @@ module JST
|
|
177
184
|
# Strip out skill name
|
178
185
|
skill_name = skills_match[1].strip!
|
179
186
|
|
180
|
-
puts "
|
187
|
+
puts "-- SKILL: #{skill_name}" if @debug
|
181
188
|
|
182
189
|
# Init skill name key, if none exists
|
183
190
|
@skills_all[skill_name] = 0 if @skills_all[skill_name].nil?
|
@@ -200,9 +207,17 @@ module JST
|
|
200
207
|
end
|
201
208
|
|
202
209
|
if at_job_desc
|
203
|
-
|
204
|
-
|
205
|
-
|
210
|
+
ignore_line = false
|
211
|
+
ignore_regexp.each do |regex|
|
212
|
+
if line.match(regex)
|
213
|
+
ignore_line = true
|
214
|
+
break
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
unless ignore_line
|
219
|
+
puts "-> #{line}" if @debug
|
220
|
+
position_desc += " #{line}"
|
206
221
|
end
|
207
222
|
end
|
208
223
|
end
|
@@ -216,12 +231,8 @@ module JST
|
|
216
231
|
position[:date_begin] = date_begin
|
217
232
|
position[:date_end] = date_end
|
218
233
|
position[:title] = title
|
219
|
-
position[:description] = description
|
234
|
+
position[:description] = description.gsub!(/ /," ")
|
220
235
|
@positions.push(position)
|
221
|
-
|
222
|
-
# @positions[position_title] = position_desc
|
223
|
-
# position_title = ''
|
224
|
-
# position_desc = ''
|
225
236
|
end
|
226
237
|
|
227
238
|
def create_response
|
data/lib/jst/version.rb
CHANGED