jst-parser 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +8 -8
  2. data/lib/jst.rb +35 -24
  3. data/lib/jst/version.rb +1 -1
  4. metadata +1 -1
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- YWM5MzYzYmZkMmU0ZDY1YzZkMmY4NmE0YmM2MjE3MTlhYWQ2MzYzOA==
4
+ YmZlMDNmYjY5MTYzYTFjNmQ5NjFiMjhkOTY4NDkwYjhjMWZiOTg3OQ==
5
5
  data.tar.gz: !binary |-
6
- ZjE0Yjc2YTNmMGQxZjNiNGYzOGQxMzk5OWY1ZDZjZmE4NGY1NjQ2Mw==
6
+ ZjI2MGI5ZjlmMGFiYWI1MWUzOWFlOTI4MzAwYTk5YzBjY2M0YjY3ZQ==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- MWNjMGQzYzQ4ZmZmYjcyNjM5ZTJkMzdiMmEzMGZmNGJmMzUwZWM3NzM0MWMx
10
- YTMzZWUyNmI5ODI5NzU1ZjM3MmMxNGNhNjU2YjgyYTg2NWM3OWU5MjI1NTVm
11
- Njc5MWM2YWFmYjA5ZjhkNjdkZDYzNzJjNjZlNTI3NGFkOTYwYmY=
9
+ MWNhYWQ2ODA4Y2FiOWQxOTNiNjUzMWNhOWRiYzk0NmM3MDVhODdlMDE3YmRk
10
+ MzE5MWVmZDU3YmE0OGJkYWU1NTZjNWZhZGRmMGJjYTI3MzUxMDhhMGExZWFj
11
+ ZDBhODVmNzJiZGQyODhhYjFjOGYwNGE5MjVjMmUwZjFkMDRkYTY=
12
12
  data.tar.gz: !binary |-
13
- NDUyNDNmMDVlOTBjYmYyYWMzZWNmMDg2NDgwMzIyMjgzYWVjMjk1ZWQ0MDVk
14
- MjBjY2UwM2NiZTZhOTc2MTEwMzQ3M2IyMWVjODRmNTk0MDhkYWY3ODJmNjAy
15
- NzgwZjI3NjA1MjI1MzgyOTdhZTg1MzkxYTAzMzdjNzQzOTEwNzk=
13
+ ZjA0YzY4MDJlNDY5ZGU1N2FjZjYyMDFjY2U0NzQ2YjgyNGY0YTRjZDk5ODQ3
14
+ YjM2OWE3YTA4N2ZjMjNiY2YzODg3MjM4ZTE0MzZjYjA5ODFhMTE2ZGE3ZGM3
15
+ ZmE4NmY2MzA2ZDM4YzcyNjg0YzY2OWY2YzM5YThkZDkzOGI4ZDI=
data/lib/jst.rb CHANGED
@@ -14,6 +14,7 @@ module JST
14
14
  BRANCH_DOD = 'Department of Defense'
15
15
 
16
16
  class BadPDFError < StandardError ; end
17
+ class UnknownPDFParsingError < StandardError ; end
17
18
 
18
19
  def parse(pdf_file)
19
20
  unless @debug
@@ -48,10 +49,12 @@ module JST
48
49
  raise BadPDFError, "Could not parse JST."
49
50
  rescue ArgumentError
50
51
  raise BadPDFError, "PDF text parsing exception."
51
- rescue PDF::Reader::UnknownGlyphWidthError
52
- # Waiting for this exception to be commited from the following pull request:
53
- # https://github.com/yob/pdf-reader/pull/105
54
- raise BadPDFError, "PDF text parsing exception."
52
+ rescue => exc
53
+ raise UnknownPDFParsingError, "#{exc}"
54
+ # rescue PDF::Reader::UnknownGlyphWidthError
55
+ # # Waiting for this exception to be commited from the following pull request:
56
+ # # https://github.com/yob/pdf-reader/pull/105
57
+ # raise UnknownPDFParsingError, "PDF text parsing exception."
55
58
  end
56
59
  end
57
60
  end
@@ -68,10 +71,12 @@ module JST
68
71
  skills_upper_regexp = /(.+)\s+(\d)\s+\w{2}\s+U/
69
72
  skills_vocational_regexp = /(.+)\s+(\d)\s+\w{2}\s+V/
70
73
  skills_graduate_regexp = /(.+)\s+(\d)\s+\w{2}\s+G/
71
- ignore_privacy_regexp = /PRIVACY ACT INFORMATION/
72
- ignore_date_regexp = /\(\d{1,2}\/\d{1,2}\)\(\d{1,2}\/\d{1,2}\)/
73
- ignore_misc_regexp = /None|NONE ASSIGNED/
74
- ignore_orphaned_skills = /^(\d|L|U|V|G|SH)$/
74
+ ignore_regexp = []
75
+ ignore_regexp.push (/PRIVACY ACT INFORMATION/)
76
+ ignore_regexp.push (/\(\d{1,2}\/\d{1,2}\)\(\d{1,2}\/\d{1,2}\)/)
77
+ ignore_regexp.push (/None|NONE ASSIGNED/)
78
+ ignore_regexp.push (/^(\d|L|U|V|G|SH)$/)
79
+ ignore_regexp.push (/\*\*/)
75
80
  ignore_list = nil
76
81
  @positions = []
77
82
  @skills_all = {}
@@ -96,15 +101,15 @@ module JST
96
101
  if line.match(experience_section_start)
97
102
  # Reached the job experience section. Begin parsing out.
98
103
  inside_experience_section = true
99
- puts "-- -- -- -- -- -- -- INSIDE EXPERIENCE, PARSING -- -- -- -- -- -- -- " if @debug
104
+ puts "-- -- -- JOB EXPERIENCE SECTION START -- -- -- " if @debug
100
105
  next
101
106
  end
102
107
  if line.match(experience_section_end)
103
- puts "-- -- -- -- -- -- -- FINISHED PARSING -- -- -- -- -- -- -- " if @debug
108
+ puts "-- -- -- JOB EXPERIENCE SECTION END -- -- -- " if @debug
104
109
 
105
110
  # Finished last job position. Appent previous job position.
106
111
  if !position_title.empty? && !position_desc.empty?
107
- puts '-- -- -- -- -- -- -- APPENDING POSITION -- -- -- -- -- -- -- ' if @debug
112
+ puts '-- -- APPENDING PREVIOUS POSITION -- -- ' if @debug
108
113
 
109
114
  append_position(position_branch, position_date_begin, position_date_end, position_title, position_desc)
110
115
  position_branch = ''
@@ -117,8 +122,9 @@ module JST
117
122
  end
118
123
 
119
124
  if inside_experience_section
125
+ line.strip!
120
126
  if line.match(experience_regexp)
121
- puts "~~~~~ NEW EXPERIENCE: #{line}" if @debug
127
+ puts "-- -- NEW EXPERIENCE" if @debug
122
128
 
123
129
  # Determine which branch this job title falls under
124
130
  position_branch = BRANCH_ARMY if line.match(/AR-/)
@@ -130,9 +136,10 @@ module JST
130
136
 
131
137
  # Determine the service date (dd-MMM-yyyy)
132
138
  if date_match = line.match(experience_date)
133
- puts " ^^^^^ PARSING DATE ^^^^^^ "
134
139
  position_date_begin = date_match[1] unless date_match[1].nil?
140
+ puts "-- START DATE: #{position_date_begin}" unless date_match[1].nil?
135
141
  position_date_end = date_match[2] unless date_match[2].nil?
142
+ puts "-- END DATE: #{position_date_end}" unless date_match[2].nil?
136
143
  end
137
144
 
138
145
  # Next line will be the job titles
@@ -152,12 +159,12 @@ module JST
152
159
  end
153
160
 
154
161
  if at_job_title
155
- puts "~~~~~ JOB TITLE: #{line}" if @debug
162
+ puts "-- JOB TITLE: #{line}" if @debug
156
163
  at_job_title = false
157
164
  position_title = line
158
165
 
159
166
  # Next line will be the job description starting point
160
- puts "~~~~~ AT JOB DESC" if @debug
167
+ puts "-- JOB DESCRIPTION:"
161
168
  at_job_desc = true
162
169
  next
163
170
  end
@@ -177,7 +184,7 @@ module JST
177
184
  # Strip out skill name
178
185
  skill_name = skills_match[1].strip!
179
186
 
180
- puts "**** SKILL: #{skill_name}" if @debug
187
+ puts "-- SKILL: #{skill_name}" if @debug
181
188
 
182
189
  # Init skill name key, if none exists
183
190
  @skills_all[skill_name] = 0 if @skills_all[skill_name].nil?
@@ -200,9 +207,17 @@ module JST
200
207
  end
201
208
 
202
209
  if at_job_desc
203
- unless line.match(ignore_privacy_regexp) || line.match(ignore_date_regexp) || line.match(ignore_misc_regexp) || line.match(ignore_orphaned_skills)
204
- puts "m: #{line}" if @debug
205
- position_desc += line
210
+ ignore_line = false
211
+ ignore_regexp.each do |regex|
212
+ if line.match(regex)
213
+ ignore_line = true
214
+ break
215
+ end
216
+ end
217
+
218
+ unless ignore_line
219
+ puts "-> #{line}" if @debug
220
+ position_desc += " #{line}"
206
221
  end
207
222
  end
208
223
  end
@@ -216,12 +231,8 @@ module JST
216
231
  position[:date_begin] = date_begin
217
232
  position[:date_end] = date_end
218
233
  position[:title] = title
219
- position[:description] = description
234
+ position[:description] = description.gsub!(/ /," ")
220
235
  @positions.push(position)
221
-
222
- # @positions[position_title] = position_desc
223
- # position_title = ''
224
- # position_desc = ''
225
236
  end
226
237
 
227
238
  def create_response
data/lib/jst/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module JST
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jst-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Little