jst-parser 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +8 -8
  2. data/lib/jst.rb +35 -24
  3. data/lib/jst/version.rb +1 -1
  4. metadata +1 -1
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- YWM5MzYzYmZkMmU0ZDY1YzZkMmY4NmE0YmM2MjE3MTlhYWQ2MzYzOA==
4
+ YmZlMDNmYjY5MTYzYTFjNmQ5NjFiMjhkOTY4NDkwYjhjMWZiOTg3OQ==
5
5
  data.tar.gz: !binary |-
6
- ZjE0Yjc2YTNmMGQxZjNiNGYzOGQxMzk5OWY1ZDZjZmE4NGY1NjQ2Mw==
6
+ ZjI2MGI5ZjlmMGFiYWI1MWUzOWFlOTI4MzAwYTk5YzBjY2M0YjY3ZQ==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- MWNjMGQzYzQ4ZmZmYjcyNjM5ZTJkMzdiMmEzMGZmNGJmMzUwZWM3NzM0MWMx
10
- YTMzZWUyNmI5ODI5NzU1ZjM3MmMxNGNhNjU2YjgyYTg2NWM3OWU5MjI1NTVm
11
- Njc5MWM2YWFmYjA5ZjhkNjdkZDYzNzJjNjZlNTI3NGFkOTYwYmY=
9
+ MWNhYWQ2ODA4Y2FiOWQxOTNiNjUzMWNhOWRiYzk0NmM3MDVhODdlMDE3YmRk
10
+ MzE5MWVmZDU3YmE0OGJkYWU1NTZjNWZhZGRmMGJjYTI3MzUxMDhhMGExZWFj
11
+ ZDBhODVmNzJiZGQyODhhYjFjOGYwNGE5MjVjMmUwZjFkMDRkYTY=
12
12
  data.tar.gz: !binary |-
13
- NDUyNDNmMDVlOTBjYmYyYWMzZWNmMDg2NDgwMzIyMjgzYWVjMjk1ZWQ0MDVk
14
- MjBjY2UwM2NiZTZhOTc2MTEwMzQ3M2IyMWVjODRmNTk0MDhkYWY3ODJmNjAy
15
- NzgwZjI3NjA1MjI1MzgyOTdhZTg1MzkxYTAzMzdjNzQzOTEwNzk=
13
+ ZjA0YzY4MDJlNDY5ZGU1N2FjZjYyMDFjY2U0NzQ2YjgyNGY0YTRjZDk5ODQ3
14
+ YjM2OWE3YTA4N2ZjMjNiY2YzODg3MjM4ZTE0MzZjYjA5ODFhMTE2ZGE3ZGM3
15
+ ZmE4NmY2MzA2ZDM4YzcyNjg0YzY2OWY2YzM5YThkZDkzOGI4ZDI=
data/lib/jst.rb CHANGED
@@ -14,6 +14,7 @@ module JST
14
14
  BRANCH_DOD = 'Department of Defense'
15
15
 
16
16
  class BadPDFError < StandardError ; end
17
+ class UnknownPDFParsingError < StandardError ; end
17
18
 
18
19
  def parse(pdf_file)
19
20
  unless @debug
@@ -48,10 +49,12 @@ module JST
48
49
  raise BadPDFError, "Could not parse JST."
49
50
  rescue ArgumentError
50
51
  raise BadPDFError, "PDF text parsing exception."
51
- rescue PDF::Reader::UnknownGlyphWidthError
52
- # Waiting for this exception to be commited from the following pull request:
53
- # https://github.com/yob/pdf-reader/pull/105
54
- raise BadPDFError, "PDF text parsing exception."
52
+ rescue => exc
53
+ raise UnknownPDFParsingError, "#{exc}"
54
+ # rescue PDF::Reader::UnknownGlyphWidthError
55
+ # # Waiting for this exception to be commited from the following pull request:
56
+ # # https://github.com/yob/pdf-reader/pull/105
57
+ # raise UnknownPDFParsingError, "PDF text parsing exception."
55
58
  end
56
59
  end
57
60
  end
@@ -68,10 +71,12 @@ module JST
68
71
  skills_upper_regexp = /(.+)\s+(\d)\s+\w{2}\s+U/
69
72
  skills_vocational_regexp = /(.+)\s+(\d)\s+\w{2}\s+V/
70
73
  skills_graduate_regexp = /(.+)\s+(\d)\s+\w{2}\s+G/
71
- ignore_privacy_regexp = /PRIVACY ACT INFORMATION/
72
- ignore_date_regexp = /\(\d{1,2}\/\d{1,2}\)\(\d{1,2}\/\d{1,2}\)/
73
- ignore_misc_regexp = /None|NONE ASSIGNED/
74
- ignore_orphaned_skills = /^(\d|L|U|V|G|SH)$/
74
+ ignore_regexp = []
75
+ ignore_regexp.push (/PRIVACY ACT INFORMATION/)
76
+ ignore_regexp.push (/\(\d{1,2}\/\d{1,2}\)\(\d{1,2}\/\d{1,2}\)/)
77
+ ignore_regexp.push (/None|NONE ASSIGNED/)
78
+ ignore_regexp.push (/^(\d|L|U|V|G|SH)$/)
79
+ ignore_regexp.push (/\*\*/)
75
80
  ignore_list = nil
76
81
  @positions = []
77
82
  @skills_all = {}
@@ -96,15 +101,15 @@ module JST
96
101
  if line.match(experience_section_start)
97
102
  # Reached the job experience section. Begin parsing out.
98
103
  inside_experience_section = true
99
- puts "-- -- -- -- -- -- -- INSIDE EXPERIENCE, PARSING -- -- -- -- -- -- -- " if @debug
104
+ puts "-- -- -- JOB EXPERIENCE SECTION START -- -- -- " if @debug
100
105
  next
101
106
  end
102
107
  if line.match(experience_section_end)
103
- puts "-- -- -- -- -- -- -- FINISHED PARSING -- -- -- -- -- -- -- " if @debug
108
+ puts "-- -- -- JOB EXPERIENCE SECTION END -- -- -- " if @debug
104
109
 
105
110
  # Finished last job position. Appent previous job position.
106
111
  if !position_title.empty? && !position_desc.empty?
107
- puts '-- -- -- -- -- -- -- APPENDING POSITION -- -- -- -- -- -- -- ' if @debug
112
+ puts '-- -- APPENDING PREVIOUS POSITION -- -- ' if @debug
108
113
 
109
114
  append_position(position_branch, position_date_begin, position_date_end, position_title, position_desc)
110
115
  position_branch = ''
@@ -117,8 +122,9 @@ module JST
117
122
  end
118
123
 
119
124
  if inside_experience_section
125
+ line.strip!
120
126
  if line.match(experience_regexp)
121
- puts "~~~~~ NEW EXPERIENCE: #{line}" if @debug
127
+ puts "-- -- NEW EXPERIENCE" if @debug
122
128
 
123
129
  # Determine which branch this job title falls under
124
130
  position_branch = BRANCH_ARMY if line.match(/AR-/)
@@ -130,9 +136,10 @@ module JST
130
136
 
131
137
  # Determine the service date (dd-MMM-yyyy)
132
138
  if date_match = line.match(experience_date)
133
- puts " ^^^^^ PARSING DATE ^^^^^^ "
134
139
  position_date_begin = date_match[1] unless date_match[1].nil?
140
+ puts "-- START DATE: #{position_date_begin}" unless date_match[1].nil?
135
141
  position_date_end = date_match[2] unless date_match[2].nil?
142
+ puts "-- END DATE: #{position_date_end}" unless date_match[2].nil?
136
143
  end
137
144
 
138
145
  # Next line will be the job titles
@@ -152,12 +159,12 @@ module JST
152
159
  end
153
160
 
154
161
  if at_job_title
155
- puts "~~~~~ JOB TITLE: #{line}" if @debug
162
+ puts "-- JOB TITLE: #{line}" if @debug
156
163
  at_job_title = false
157
164
  position_title = line
158
165
 
159
166
  # Next line will be the job description starting point
160
- puts "~~~~~ AT JOB DESC" if @debug
167
+ puts "-- JOB DESCRIPTION:"
161
168
  at_job_desc = true
162
169
  next
163
170
  end
@@ -177,7 +184,7 @@ module JST
177
184
  # Strip out skill name
178
185
  skill_name = skills_match[1].strip!
179
186
 
180
- puts "**** SKILL: #{skill_name}" if @debug
187
+ puts "-- SKILL: #{skill_name}" if @debug
181
188
 
182
189
  # Init skill name key, if none exists
183
190
  @skills_all[skill_name] = 0 if @skills_all[skill_name].nil?
@@ -200,9 +207,17 @@ module JST
200
207
  end
201
208
 
202
209
  if at_job_desc
203
- unless line.match(ignore_privacy_regexp) || line.match(ignore_date_regexp) || line.match(ignore_misc_regexp) || line.match(ignore_orphaned_skills)
204
- puts "m: #{line}" if @debug
205
- position_desc += line
210
+ ignore_line = false
211
+ ignore_regexp.each do |regex|
212
+ if line.match(regex)
213
+ ignore_line = true
214
+ break
215
+ end
216
+ end
217
+
218
+ unless ignore_line
219
+ puts "-> #{line}" if @debug
220
+ position_desc += " #{line}"
206
221
  end
207
222
  end
208
223
  end
@@ -216,12 +231,8 @@ module JST
216
231
  position[:date_begin] = date_begin
217
232
  position[:date_end] = date_end
218
233
  position[:title] = title
219
- position[:description] = description
234
+ position[:description] = description.gsub!(/ /," ")
220
235
  @positions.push(position)
221
-
222
- # @positions[position_title] = position_desc
223
- # position_title = ''
224
- # position_desc = ''
225
236
  end
226
237
 
227
238
  def create_response
data/lib/jst/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module JST
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jst-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Little