jst-parser 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ Y2IyNDAwNGQzZTVlODFhNWJhZGFlYmZlZmQ0ZmQ1YjljYmI5YmFiYQ==
5
+ data.tar.gz: !binary |-
6
+ ZDMwZWYzYTMzY2JjMzUyYWJiN2IzZWUxNTAyOWJhOTZmZDNlMzkwZg==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ MjAyNjc0ODE2YmZhNGM2NGIzODQwOGE2MjAyNWQyNjM0MTY0YWE3NjM4YWVi
10
+ NDkwOGQ5NjY4MWRiMmVhMjcwODgwZmM2MTMyNDZkMDY3OWY3ZmE4MmNmNzM1
11
+ NDk4MTY0NTRjNTYwYTNkYzM5ZDg3MzAzYzBjMTA5YWQ4OTc2MDc=
12
+ data.tar.gz: !binary |-
13
+ ODYxMTdjNGNjMzBmNjQ0M2ZlZmE0YWIwNjNhYTA2MTQxMTJjOTM1YTBjNTRh
14
+ YjlhNTViMDEwYzU1YTAyNGRiMTEwMWNlMzNkMWRlZmIwMTYxMDdiNGUxNDRj
15
+ MDQ0M2IzNzUzNTQ0NGM2MjI5NjkyNGFlMGM3ZGEzMWMzYTZmYTU=
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.gem
2
+ pkg/
3
+ coverage/
4
+ spec/cassettes
5
+ spec/cassettes/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # PDF Parser
4
+ gem 'pdf-reader', '~> 1.3.3'
data/Gemfile.lock ADDED
@@ -0,0 +1,20 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ Ascii85 (1.0.2)
5
+ afm (0.2.0)
6
+ hashery (2.1.0)
7
+ pdf-reader (1.3.3)
8
+ Ascii85 (~> 1.0.0)
9
+ afm (~> 0.2.0)
10
+ hashery (~> 2.0)
11
+ ruby-rc4
12
+ ttfunk
13
+ ruby-rc4 (0.1.5)
14
+ ttfunk (1.0.3)
15
+
16
+ PLATFORMS
17
+ ruby
18
+
19
+ DEPENDENCIES
20
+ pdf-reader (~> 1.3.3)
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Chris Little
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,51 @@
1
+ -- Nothing yet. Stay tuned! --
2
+
3
+ ## Installation
4
+
5
+ Add this line to your application's Gemfile:
6
+
7
+ gem 'jst-parser'
8
+
9
+ And then execute:
10
+
11
+ $ bundle
12
+
13
+ Or install it yourself as:
14
+
15
+ $ gem install jst-parser
16
+
17
+ ## Usage
18
+
19
+ The JST::Parser object accepts a PDF of type IO::File, and will return a hash containing the following
20
+ name,
21
+ rank,
22
+ education,
23
+ [experience:
24
+ branch,
25
+ date_begin,
26
+ date_end,
27
+ title,
28
+ description],
29
+ skills,
30
+ skills_lower,
31
+ skills_upper,
32
+ skills_vocational,
33
+ skills_graduate,
34
+
35
+ ## Code Example
36
+
37
+ require 'jst'
38
+
39
+ your_pdf = File.open('path/to/pdf')
40
+ parsed_document = JST::Parser.parse(your_pdf)
41
+
42
+ parsed_document[:name]
43
+
44
+
45
+ ## Contributing
46
+
47
+ 1. Fork it
48
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
49
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
50
+ 4. Push to the branch (`git push origin my-new-feature`)
51
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+ require 'bundler'
2
+ require "bundler/gem_tasks"
3
+
4
+ Bundler::GemHelper.install_tasks
5
+
6
+ require 'rspec/core/rake_task'
7
+
8
+ RSpec::Core::RakeTask.new(:spec) do |t|
9
+ t.rspec_opts = '--color'
10
+ t.pattern = 'spec/**/*_spec.rb'
11
+ end
12
+
13
+ desc 'Run Tests'
14
+ task :default => :spec
@@ -0,0 +1,31 @@
1
+ lib = File.expand_path('../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+
4
+ require 'jst/version'
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = 'jst-parser'
8
+ s.version = JST::VERSION
9
+ s.platform = Gem::Platform::RUBY
10
+ s.date = '2013-05-30'
11
+
12
+ s.summary = 'Joint Service Transcript (JST) parsing utility.'
13
+ s.description = 'A PDF parser for the Joint Service Transcript (JST), a standardized
14
+ service transcript for Army, Marine Corps, Navy, and Coast Guard personnel.
15
+ (https://jst.doded.mil/faq.html)
16
+ Returns accumulated skills, military experience, and education as JSON.'
17
+ s.authors = ['Chris Little']
18
+ s.email = 'razenghan@gmail.com'
19
+ s.homepage = 'http://rubygems.org/gems/jst-parser'
20
+ s.license = 'MIT'
21
+ s.files = `git ls-files`.split($/)
22
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
23
+ s.require_path = 'lib'
24
+
25
+ # PDF Parser
26
+ s.add_dependency 'pdf-reader', '~> 1.3.3'
27
+
28
+ # Development
29
+ s.add_development_dependency "bundler", "~> 1.3"
30
+ s.add_development_dependency "rake"
31
+ end
data/lib/jst.rb ADDED
@@ -0,0 +1,265 @@
1
+ require 'pdf-reader'
2
+
3
+ module JST
4
+ class Parser
5
+ attr_accessor :debug
6
+ attr_writer :jst_response, :name, :rank, :educations, :positions, :skills_all,
7
+ :skills_lower, :skills_upper, :skills_vocational, :skills_graduate
8
+
9
+ BRANCH_ARMY = 'United States Army'
10
+ BRANCH_NAVY = 'United States Navy'
11
+ BRANCH_AIR = 'United States Air Force'
12
+ BRANCH_MARINES = 'United States Marine Corps'
13
+ BRANCH_COAST = 'United States Coast Guard'
14
+ BRANCH_DOD = 'Department of Defense'
15
+
16
+ class BadPDFError < StandardError ; end
17
+
18
+ def parse(pdf_file)
19
+ unless @debug
20
+ @debug = false
21
+ end
22
+
23
+ unless pdf_file.nil? || pdf_file.size <= 0
24
+ begin
25
+ pdf_reader = PDF::Reader.new(pdf_file)
26
+
27
+ # Iterate through each page & concat
28
+ pdf_text = ''
29
+ pdf_reader.pages.each do |page|
30
+ pdf_text += page.text
31
+ end
32
+
33
+ # Pull out various attributes
34
+ @name ||= pdf_text.match(/Name:(.+)$/)
35
+ @name = @name[@name.length - 1].gsub!(/^\s+/,'') unless @name.nil?
36
+
37
+ @rank ||= pdf_text.match(/Rank:(.+)$/)
38
+ @rank = @rank[@rank.length - 1].gsub!(/^\s+/,'') unless @rank.nil?
39
+
40
+ @status ||= pdf_text.match(/Status:(.+)$/)
41
+ @status = @status[@status.length - 1].gsub!(/^\s+/,'') unless @status.nil?
42
+
43
+ parse_experience(pdf_text)
44
+ create_response()
45
+
46
+ return @jst_response
47
+ rescue PDF::Reader::MalformedPDFError
48
+ raise JST::Parser::BadPDFError, "Could not parse JST."
49
+ end
50
+ end
51
+ end
52
+
53
+ private
54
+ def parse_experience(content)
55
+ experience_section_start = /Military Experience/
56
+ experience_section_end = /College Level Test Scores|Other Learning Experiences/
57
+ #experience_regexp = /[A-Z]{2,4}\-.{2,4}\-.{2,4}\s+\d{2}\-\w{3}-\d{4}/
58
+ #experience_regexp = /([A-Z]{2,4}\-.{2,4}\-.{2,4})|(NONE ASSIGNED)\s+\d{2}\-\w{3}-\d{4}/
59
+ experience_regexp = /([A-Z]{2,4}\-.{2,4}\-.{2,4}\s+\d{2}\-\w{3}-\d{4})|(NONE ASSIGNED)\s+\d{2}\-\w{3}-\d{4}/
60
+ experience_date = /(\d{2}\-[A-Z]{3}\-\d{2,4})\D*(\d{2}\-[A-Z]{3}\-\d{4})?/
61
+ skills_lower_regexp = /(.+)\s+(\d)\s+\w{2}\s+L/
62
+ skills_upper_regexp = /(.+)\s+(\d)\s+\w{2}\s+U/
63
+ skills_vocational_regexp = /(.+)\s+(\d)\s+\w{2}\s+V/
64
+ skills_graduate_regexp = /(.+)\s+(\d)\s+\w{2}\s+G/
65
+ ignore_privacy_regexp = /PRIVACY ACT INFORMATION/
66
+ ignore_date_regexp = /\(\d{1,2}\/\d{1,2}\)\(\d{1,2}\/\d{1,2}\)/
67
+ ignore_misc_regexp = /None|NONE ASSIGNED/
68
+ ignore_orphaned_skills = /^(\d|L|U|V|G|SH)$/
69
+ ignore_list = nil
70
+ @positions = []
71
+ @skills_all = {}
72
+ @skills_lower = {}
73
+ @skills_upper = {}
74
+ @skills_vocational = {}
75
+ @skills_graduate = {}
76
+ position = {}
77
+ position_branch = ''
78
+ position_date_begin = ''
79
+ position_date_end = ''
80
+ position_title = ''
81
+ position_desc = ''
82
+ inside_experience_section = false
83
+ at_job_title = false
84
+ at_job_desc = false
85
+ content_array = content.split("\n")
86
+
87
+ content_array.each do |line|
88
+ line.strip!
89
+ next if line.empty?
90
+ if line.match(experience_section_start)
91
+ # Reached the job experience section. Begin parsing out.
92
+ inside_experience_section = true
93
+ puts "-- -- -- -- -- -- -- INSIDE EXPERIENCE, PARSING -- -- -- -- -- -- -- " if @debug
94
+ next
95
+ end
96
+ if line.match(experience_section_end)
97
+ puts "-- -- -- -- -- -- -- FINISHED PARSING -- -- -- -- -- -- -- " if @debug
98
+
99
+ # Finished last job position. Appent previous job position.
100
+ if !position_title.empty? && !position_desc.empty?
101
+ puts '-- -- -- -- -- -- -- APPENDING POSITION -- -- -- -- -- -- -- ' if @debug
102
+
103
+ append_position(position_branch, position_date_begin, position_date_end, position_title, position_desc)
104
+ position_branch = ''
105
+ position_date_begin = ''
106
+ position_date_end = ''
107
+ position_title = ''
108
+ position_desc = ''
109
+ end
110
+ break
111
+ end
112
+
113
+ if inside_experience_section
114
+ if line.match(experience_regexp)
115
+ puts "~~~~~ NEW EXPERIENCE: #{line}" if @debug
116
+
117
+ # Determine which branch this job title falls under
118
+ position_branch = BRANCH_ARMY if line.match(/AR-/)
119
+ position_branch = BRANCH_AIR if line.match(/AF-/)
120
+ position_branch = BRANCH_NAVY if line.match(/NV-|NEC-|NER-|LDO-|NWO-/)
121
+ position_branch = BRANCH_MARINES if line.match(/MC-|MCE-/)
122
+ position_branch = BRANCH_COAST if line.match(/CG-|CGR-|CGW-/)
123
+ position_branch = BRANCH_DOD if line.match(/DD-/)
124
+
125
+ # Determine the service date (dd-MMM-yyyy)
126
+ if date_match = line.match(experience_date)
127
+ puts " ^^^^^ PARSING DATE ^^^^^^ "
128
+ position_date_begin = date_match[1] unless date_match[1].nil?
129
+ position_date_end = date_match[2] unless date_match[2].nil?
130
+ end
131
+
132
+ # Next line will be the job titles
133
+ at_job_title = true
134
+
135
+ # Since we're at the next job position, append the previous job position.
136
+ if !position_title.empty? && !position_desc.empty?
137
+ append_position(position_branch, position_date_begin, position_date_end, position_title, position_desc)
138
+ position_branch = ''
139
+ position_date_begin = ''
140
+ position_date_end = ''
141
+ position_title = ''
142
+ position_desc = ''
143
+ end
144
+
145
+ next
146
+ end
147
+
148
+ if at_job_title
149
+ puts "~~~~~ JOB TITLE: #{line}" if @debug
150
+ at_job_title = false
151
+ position_title = line
152
+
153
+ # Next line will be the job description starting point
154
+ puts "~~~~~ AT JOB DESC" if @debug
155
+ at_job_desc = true
156
+ next
157
+ end
158
+
159
+ if skills_match = line.match(skills_lower_regexp)
160
+ skill_is_lower = true
161
+ elsif skills_match = line.match(skills_upper_regexp)
162
+ skill_is_upper = true
163
+ elsif skills_match = line.match(skills_vocational_regexp)
164
+ skill_is_vocational = true
165
+ elsif skills_match = line.match(skills_graduate_regexp)
166
+ skill_is_graduate = true
167
+ end
168
+
169
+ if skill_is_lower || skill_is_upper || skill_is_vocational || skill_is_graduate
170
+ unless skills_match[1].empty?
171
+ # Strip out skill name
172
+ skill_name = skills_match[1].strip!
173
+
174
+ puts "**** SKILL: #{skill_name}" if @debug
175
+
176
+ # Init skill name key, if none exists
177
+ @skills_all[skill_name] = 0 if @skills_all[skill_name].nil?
178
+ @skills_lower[skill_name] = 0 if @skills_lower[skill_name].nil? && skill_is_lower
179
+ @skills_upper[skill_name] = 0 if @skills_upper[skill_name].nil? && skill_is_upper
180
+ @skills_vocational[skill_name] = 0 if @skills_vocational[skill_name].nil? && skill_is_vocational
181
+ @skills_graduate[skill_name] = 0 if @skills_graduate[skill_name].nil? && skill_is_graduate
182
+
183
+ # Strip out skill value (in credits). Add to skills hash
184
+ if !skills_match[1].empty? && is_numeric?(skills_match[2])
185
+ @skills_all[skill_name] = @skills_all[skill_name] + skills_match[2].to_i
186
+ @skills_lower[skill_name] = @skills_lower[skill_name] + skills_match[2].to_i if skill_is_lower
187
+ @skills_upper[skill_name] = @skills_upper[skill_name] + skills_match[2].to_i if skill_is_upper
188
+ @skills_vocational[skill_name] = @skills_vocational[skill_name] + skills_match[2].to_i if skill_is_vocational
189
+ @skills_graduate[skill_name] = @skills_graduate[skill_name] + skills_match[2].to_i if skill_is_graduate
190
+ end
191
+
192
+ end
193
+ next
194
+ end
195
+
196
+ if at_job_desc
197
+ unless line.match(ignore_privacy_regexp) || line.match(ignore_date_regexp) || line.match(ignore_misc_regexp) || line.match(ignore_orphaned_skills)
198
+ puts "m: #{line}" if @debug
199
+ position_desc += line
200
+ end
201
+ end
202
+ end
203
+ end
204
+ # skills.sort_by {|key, value| value}
205
+ end
206
+
207
+ def append_position(branch, date_begin, date_end, title, description)
208
+ position = {}
209
+ position[:branch] = branch
210
+ position[:date_begin] = date_begin
211
+ position[:date_end] = date_end
212
+ position[:title] = title
213
+ position[:description] = description
214
+ @positions.push(position)
215
+
216
+ # @positions[position_title] = position_desc
217
+ # position_title = ''
218
+ # position_desc = ''
219
+ end
220
+
221
+ def create_response
222
+ @jst_response = {}
223
+ @jst_response[:name] = @name
224
+ @jst_response[:rank] = @rank
225
+ @jst_response[:education] = @educations
226
+ @jst_response[:experience] = @positions
227
+ @jst_response[:skills] = @skills_all
228
+ @jst_response[:skills_lower] = @skills_lower
229
+ @jst_response[:skills_upper] = @skills_upper
230
+ @jst_response[:skills_vocational] = @skills_vocational
231
+ @jst_response[:skills_graduate] = @skills_graduate
232
+ end
233
+
234
+ # We'll clean up coursework later....
235
+ # def parse_coursework(content)
236
+ # # Define regexp matchers
237
+ # coursework_regexp = /\-[0-9]{4}\-[0-9]{4}/
238
+ # experience_regexp = /Military Experience/
239
+ # ignore_regexp = /\*\* PRIVACY ACT INFORMATION \*\*/
240
+
241
+ # @coursework = []
242
+ # course = ''
243
+ # inside_course_content = false
244
+ # content_array = content.split("\n")
245
+
246
+ # content_array.each do |line|
247
+ # puts "LINE: #{line}" if @debug
248
+ # if line.match(coursework_regexp) || line.match(experience_regexp)
249
+ # # At the next course. Append previous course information
250
+ # inside_course_content = true
251
+ # @coursework << course unless course.empty?
252
+ # course = line
253
+ # line.match(experience_regexp) ? break : next
254
+ # end
255
+ # if inside_course_content
256
+ # course += line unless line.match(ignore_regexp)
257
+ # end
258
+ # end
259
+ # end
260
+
261
+ def is_numeric?(obj)
262
+ true if Float(obj) rescue false
263
+ end
264
+ end
265
+ end
@@ -0,0 +1,3 @@
1
+ module JST
2
+ VERSION = "0.0.1"
3
+ end
data/startup.rb ADDED
@@ -0,0 +1,12 @@
1
+ # `irb -r ./startup.rb`
2
+ # Just an easy delete & reinstall mechanism
3
+
4
+ # Remove all previous gem installations
5
+ `gem uninstall -Ia jst-parser`
6
+ `rm -rf pkg/`
7
+
8
+ ## Install local source code as gem
9
+ `rake install`
10
+
11
+ ## Require the gem
12
+ require 'jst'
metadata ADDED
@@ -0,0 +1,98 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jst-parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Chris Little
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-05-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: pdf-reader
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 1.3.3
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 1.3.3
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1.3'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: ! "A PDF parser for the Joint Service Transcript (JST), a standardized\n
56
+ \ service transcript for Army, Marine Corps, Navy, and Coast Guard
57
+ personnel. \n (https://jst.doded.mil/faq.html)\n Returns
58
+ accumulated skills, military experience, and education as JSON."
59
+ email: razenghan@gmail.com
60
+ executables: []
61
+ extensions: []
62
+ extra_rdoc_files: []
63
+ files:
64
+ - .gitignore
65
+ - Gemfile
66
+ - Gemfile.lock
67
+ - LICENSE.txt
68
+ - README.md
69
+ - Rakefile
70
+ - jst-parser.gemspec
71
+ - lib/jst.rb
72
+ - lib/jst/version.rb
73
+ - startup.rb
74
+ homepage: http://rubygems.org/gems/jst-parser
75
+ licenses:
76
+ - MIT
77
+ metadata: {}
78
+ post_install_message:
79
+ rdoc_options: []
80
+ require_paths:
81
+ - lib
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ! '>='
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ! '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ requirements: []
93
+ rubyforge_project:
94
+ rubygems_version: 2.0.3
95
+ signing_key:
96
+ specification_version: 4
97
+ summary: Joint Service Transcript (JST) parsing utility.
98
+ test_files: []