jst-parser 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/lib/jst.rb +35 -24
- data/lib/jst/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
YmZlMDNmYjY5MTYzYTFjNmQ5NjFiMjhkOTY4NDkwYjhjMWZiOTg3OQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZjI2MGI5ZjlmMGFiYWI1MWUzOWFlOTI4MzAwYTk5YzBjY2M0YjY3ZQ==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MWNhYWQ2ODA4Y2FiOWQxOTNiNjUzMWNhOWRiYzk0NmM3MDVhODdlMDE3YmRk
|
10
|
+
MzE5MWVmZDU3YmE0OGJkYWU1NTZjNWZhZGRmMGJjYTI3MzUxMDhhMGExZWFj
|
11
|
+
ZDBhODVmNzJiZGQyODhhYjFjOGYwNGE5MjVjMmUwZjFkMDRkYTY=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZjA0YzY4MDJlNDY5ZGU1N2FjZjYyMDFjY2U0NzQ2YjgyNGY0YTRjZDk5ODQ3
|
14
|
+
YjM2OWE3YTA4N2ZjMjNiY2YzODg3MjM4ZTE0MzZjYjA5ODFhMTE2ZGE3ZGM3
|
15
|
+
ZmE4NmY2MzA2ZDM4YzcyNjg0YzY2OWY2YzM5YThkZDkzOGI4ZDI=
|
data/lib/jst.rb
CHANGED
@@ -14,6 +14,7 @@ module JST
|
|
14
14
|
BRANCH_DOD = 'Department of Defense'
|
15
15
|
|
16
16
|
class BadPDFError < StandardError ; end
|
17
|
+
class UnknownPDFParsingError < StandardError ; end
|
17
18
|
|
18
19
|
def parse(pdf_file)
|
19
20
|
unless @debug
|
@@ -48,10 +49,12 @@ module JST
|
|
48
49
|
raise BadPDFError, "Could not parse JST."
|
49
50
|
rescue ArgumentError
|
50
51
|
raise BadPDFError, "PDF text parsing exception."
|
51
|
-
rescue
|
52
|
-
|
53
|
-
|
54
|
-
|
52
|
+
rescue => exc
|
53
|
+
raise UnknownPDFParsingError, "#{exc}"
|
54
|
+
# rescue PDF::Reader::UnknownGlyphWidthError
|
55
|
+
# # Waiting for this exception to be commited from the following pull request:
|
56
|
+
# # https://github.com/yob/pdf-reader/pull/105
|
57
|
+
# raise UnknownPDFParsingError, "PDF text parsing exception."
|
55
58
|
end
|
56
59
|
end
|
57
60
|
end
|
@@ -68,10 +71,12 @@ module JST
|
|
68
71
|
skills_upper_regexp = /(.+)\s+(\d)\s+\w{2}\s+U/
|
69
72
|
skills_vocational_regexp = /(.+)\s+(\d)\s+\w{2}\s+V/
|
70
73
|
skills_graduate_regexp = /(.+)\s+(\d)\s+\w{2}\s+G/
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
74
|
+
ignore_regexp = []
|
75
|
+
ignore_regexp.push (/PRIVACY ACT INFORMATION/)
|
76
|
+
ignore_regexp.push (/\(\d{1,2}\/\d{1,2}\)\(\d{1,2}\/\d{1,2}\)/)
|
77
|
+
ignore_regexp.push (/None|NONE ASSIGNED/)
|
78
|
+
ignore_regexp.push (/^(\d|L|U|V|G|SH)$/)
|
79
|
+
ignore_regexp.push (/\*\*/)
|
75
80
|
ignore_list = nil
|
76
81
|
@positions = []
|
77
82
|
@skills_all = {}
|
@@ -96,15 +101,15 @@ module JST
|
|
96
101
|
if line.match(experience_section_start)
|
97
102
|
# Reached the job experience section. Begin parsing out.
|
98
103
|
inside_experience_section = true
|
99
|
-
puts "-- -- --
|
104
|
+
puts "-- -- -- JOB EXPERIENCE SECTION START -- -- -- " if @debug
|
100
105
|
next
|
101
106
|
end
|
102
107
|
if line.match(experience_section_end)
|
103
|
-
puts "-- -- --
|
108
|
+
puts "-- -- -- JOB EXPERIENCE SECTION END -- -- -- " if @debug
|
104
109
|
|
105
110
|
# Finished last job position. Appent previous job position.
|
106
111
|
if !position_title.empty? && !position_desc.empty?
|
107
|
-
puts '-- --
|
112
|
+
puts '-- -- APPENDING PREVIOUS POSITION -- -- ' if @debug
|
108
113
|
|
109
114
|
append_position(position_branch, position_date_begin, position_date_end, position_title, position_desc)
|
110
115
|
position_branch = ''
|
@@ -117,8 +122,9 @@ module JST
|
|
117
122
|
end
|
118
123
|
|
119
124
|
if inside_experience_section
|
125
|
+
line.strip!
|
120
126
|
if line.match(experience_regexp)
|
121
|
-
puts "
|
127
|
+
puts "-- -- NEW EXPERIENCE" if @debug
|
122
128
|
|
123
129
|
# Determine which branch this job title falls under
|
124
130
|
position_branch = BRANCH_ARMY if line.match(/AR-/)
|
@@ -130,9 +136,10 @@ module JST
|
|
130
136
|
|
131
137
|
# Determine the service date (dd-MMM-yyyy)
|
132
138
|
if date_match = line.match(experience_date)
|
133
|
-
puts " ^^^^^ PARSING DATE ^^^^^^ "
|
134
139
|
position_date_begin = date_match[1] unless date_match[1].nil?
|
140
|
+
puts "-- START DATE: #{position_date_begin}" unless date_match[1].nil?
|
135
141
|
position_date_end = date_match[2] unless date_match[2].nil?
|
142
|
+
puts "-- END DATE: #{position_date_end}" unless date_match[2].nil?
|
136
143
|
end
|
137
144
|
|
138
145
|
# Next line will be the job titles
|
@@ -152,12 +159,12 @@ module JST
|
|
152
159
|
end
|
153
160
|
|
154
161
|
if at_job_title
|
155
|
-
puts "
|
162
|
+
puts "-- JOB TITLE: #{line}" if @debug
|
156
163
|
at_job_title = false
|
157
164
|
position_title = line
|
158
165
|
|
159
166
|
# Next line will be the job description starting point
|
160
|
-
puts "
|
167
|
+
puts "-- JOB DESCRIPTION:"
|
161
168
|
at_job_desc = true
|
162
169
|
next
|
163
170
|
end
|
@@ -177,7 +184,7 @@ module JST
|
|
177
184
|
# Strip out skill name
|
178
185
|
skill_name = skills_match[1].strip!
|
179
186
|
|
180
|
-
puts "
|
187
|
+
puts "-- SKILL: #{skill_name}" if @debug
|
181
188
|
|
182
189
|
# Init skill name key, if none exists
|
183
190
|
@skills_all[skill_name] = 0 if @skills_all[skill_name].nil?
|
@@ -200,9 +207,17 @@ module JST
|
|
200
207
|
end
|
201
208
|
|
202
209
|
if at_job_desc
|
203
|
-
|
204
|
-
|
205
|
-
|
210
|
+
ignore_line = false
|
211
|
+
ignore_regexp.each do |regex|
|
212
|
+
if line.match(regex)
|
213
|
+
ignore_line = true
|
214
|
+
break
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
unless ignore_line
|
219
|
+
puts "-> #{line}" if @debug
|
220
|
+
position_desc += " #{line}"
|
206
221
|
end
|
207
222
|
end
|
208
223
|
end
|
@@ -216,12 +231,8 @@ module JST
|
|
216
231
|
position[:date_begin] = date_begin
|
217
232
|
position[:date_end] = date_end
|
218
233
|
position[:title] = title
|
219
|
-
position[:description] = description
|
234
|
+
position[:description] = description.gsub!(/ /," ")
|
220
235
|
@positions.push(position)
|
221
|
-
|
222
|
-
# @positions[position_title] = position_desc
|
223
|
-
# position_title = ''
|
224
|
-
# position_desc = ''
|
225
236
|
end
|
226
237
|
|
227
238
|
def create_response
|
data/lib/jst/version.rb
CHANGED