jst-parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ Y2IyNDAwNGQzZTVlODFhNWJhZGFlYmZlZmQ0ZmQ1YjljYmI5YmFiYQ==
5
+ data.tar.gz: !binary |-
6
+ ZDMwZWYzYTMzY2JjMzUyYWJiN2IzZWUxNTAyOWJhOTZmZDNlMzkwZg==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ MjAyNjc0ODE2YmZhNGM2NGIzODQwOGE2MjAyNWQyNjM0MTY0YWE3NjM4YWVi
10
+ NDkwOGQ5NjY4MWRiMmVhMjcwODgwZmM2MTMyNDZkMDY3OWY3ZmE4MmNmNzM1
11
+ NDk4MTY0NTRjNTYwYTNkYzM5ZDg3MzAzYzBjMTA5YWQ4OTc2MDc=
12
+ data.tar.gz: !binary |-
13
+ ODYxMTdjNGNjMzBmNjQ0M2ZlZmE0YWIwNjNhYTA2MTQxMTJjOTM1YTBjNTRh
14
+ YjlhNTViMDEwYzU1YTAyNGRiMTEwMWNlMzNkMWRlZmIwMTYxMDdiNGUxNDRj
15
+ MDQ0M2IzNzUzNTQ0NGM2MjI5NjkyNGFlMGM3ZGEzMWMzYTZmYTU=
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.gem
2
+ pkg/
3
+ coverage/
4
+ spec/cassettes
5
+ spec/cassettes/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # PDF Parser
4
+ gem 'pdf-reader', '~> 1.3.3'
data/Gemfile.lock ADDED
@@ -0,0 +1,20 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ Ascii85 (1.0.2)
5
+ afm (0.2.0)
6
+ hashery (2.1.0)
7
+ pdf-reader (1.3.3)
8
+ Ascii85 (~> 1.0.0)
9
+ afm (~> 0.2.0)
10
+ hashery (~> 2.0)
11
+ ruby-rc4
12
+ ttfunk
13
+ ruby-rc4 (0.1.5)
14
+ ttfunk (1.0.3)
15
+
16
+ PLATFORMS
17
+ ruby
18
+
19
+ DEPENDENCIES
20
+ pdf-reader (~> 1.3.3)
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Chris Little
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,51 @@
1
+ -- Nothing yet. Stay tuned! --
2
+
3
+ ## Installation
4
+
5
+ Add this line to your application's Gemfile:
6
+
7
+ gem 'jst-parser'
8
+
9
+ And then execute:
10
+
11
+ $ bundle
12
+
13
+ Or install it yourself as:
14
+
15
+ $ gem install jst-parser
16
+
17
+ ## Usage
18
+
19
+ The JST::Parser object accepts a PDF of type IO::File, and will return a hash containing the following
20
+ name,
21
+ rank,
22
+ education,
23
+ [experience:
24
+ branch,
25
+ date_begin,
26
+ date_end,
27
+ title,
28
+ description],
29
+ skills,
30
+ skills_lower,
31
+ skills_upper,
32
+ skills_vocational,
33
+ skills_graduate,
34
+
35
+ ## Code Example
36
+
37
+ require 'jst'
38
+
39
+ your_pdf = File.open('path/to/pdf')
40
+ parsed_document = JST::Parser.parse(your_pdf)
41
+
42
+ parsed_document[:name]
43
+
44
+
45
+ ## Contributing
46
+
47
+ 1. Fork it
48
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
49
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
50
+ 4. Push to the branch (`git push origin my-new-feature`)
51
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+ require 'bundler'
2
+ require "bundler/gem_tasks"
3
+
4
+ Bundler::GemHelper.install_tasks
5
+
6
+ require 'rspec/core/rake_task'
7
+
8
+ RSpec::Core::RakeTask.new(:spec) do |t|
9
+ t.rspec_opts = '--color'
10
+ t.pattern = 'spec/**/*_spec.rb'
11
+ end
12
+
13
+ desc 'Run Tests'
14
+ task :default => :spec
@@ -0,0 +1,31 @@
1
+ lib = File.expand_path('../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+
4
+ require 'jst/version'
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = 'jst-parser'
8
+ s.version = JST::VERSION
9
+ s.platform = Gem::Platform::RUBY
10
+ s.date = '2013-05-30'
11
+
12
+ s.summary = 'Joint Service Transcript (JST) parsing utility.'
13
+ s.description = 'A PDF parser for the Joint Service Transcript (JST), a standardized
14
+ service transcript for Army, Marine Corps, Navy, and Coast Guard personnel.
15
+ (https://jst.doded.mil/faq.html)
16
+ Returns accumulated skills, military experience, and education as JSON.'
17
+ s.authors = ['Chris Little']
18
+ s.email = 'razenghan@gmail.com'
19
+ s.homepage = 'http://rubygems.org/gems/jst-parser'
20
+ s.license = 'MIT'
21
+ s.files = `git ls-files`.split($/)
22
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
23
+ s.require_path = 'lib'
24
+
25
+ # PDF Parser
26
+ s.add_dependency 'pdf-reader', '~> 1.3.3'
27
+
28
+ # Development
29
+ s.add_development_dependency "bundler", "~> 1.3"
30
+ s.add_development_dependency "rake"
31
+ end
data/lib/jst.rb ADDED
@@ -0,0 +1,265 @@
1
+ require 'pdf-reader'
2
+
3
+ module JST
4
+ class Parser
5
+ attr_accessor :debug
6
+ attr_writer :jst_response, :name, :rank, :educations, :positions, :skills_all,
7
+ :skills_lower, :skills_upper, :skills_vocational, :skills_graduate
8
+
9
+ BRANCH_ARMY = 'United States Army'
10
+ BRANCH_NAVY = 'United States Navy'
11
+ BRANCH_AIR = 'United States Air Force'
12
+ BRANCH_MARINES = 'United States Marine Corps'
13
+ BRANCH_COAST = 'United States Coast Guard'
14
+ BRANCH_DOD = 'Department of Defense'
15
+
16
+ class BadPDFError < StandardError ; end
17
+
18
+ def parse(pdf_file)
19
+ unless @debug
20
+ @debug = false
21
+ end
22
+
23
+ unless pdf_file.nil? || pdf_file.size <= 0
24
+ begin
25
+ pdf_reader = PDF::Reader.new(pdf_file)
26
+
27
+ # Iterate through each page & concat
28
+ pdf_text = ''
29
+ pdf_reader.pages.each do |page|
30
+ pdf_text += page.text
31
+ end
32
+
33
+ # Pull out various attributes
34
+ @name ||= pdf_text.match(/Name:(.+)$/)
35
+ @name = @name[@name.length - 1].gsub!(/^\s+/,'') unless @name.nil?
36
+
37
+ @rank ||= pdf_text.match(/Rank:(.+)$/)
38
+ @rank = @rank[@rank.length - 1].gsub!(/^\s+/,'') unless @rank.nil?
39
+
40
+ @status ||= pdf_text.match(/Status:(.+)$/)
41
+ @status = @status[@status.length - 1].gsub!(/^\s+/,'') unless @status.nil?
42
+
43
+ parse_experience(pdf_text)
44
+ create_response()
45
+
46
+ return @jst_response
47
+ rescue PDF::Reader::MalformedPDFError
48
+ raise JST::Parser::BadPDFError, "Could not parse JST."
49
+ end
50
+ end
51
+ end
52
+
53
+ private
54
+ def parse_experience(content)
55
+ experience_section_start = /Military Experience/
56
+ experience_section_end = /College Level Test Scores|Other Learning Experiences/
57
+ #experience_regexp = /[A-Z]{2,4}\-.{2,4}\-.{2,4}\s+\d{2}\-\w{3}-\d{4}/
58
+ #experience_regexp = /([A-Z]{2,4}\-.{2,4}\-.{2,4})|(NONE ASSIGNED)\s+\d{2}\-\w{3}-\d{4}/
59
+ experience_regexp = /([A-Z]{2,4}\-.{2,4}\-.{2,4}\s+\d{2}\-\w{3}-\d{4})|(NONE ASSIGNED)\s+\d{2}\-\w{3}-\d{4}/
60
+ experience_date = /(\d{2}\-[A-Z]{3}\-\d{2,4})\D*(\d{2}\-[A-Z]{3}\-\d{4})?/
61
+ skills_lower_regexp = /(.+)\s+(\d)\s+\w{2}\s+L/
62
+ skills_upper_regexp = /(.+)\s+(\d)\s+\w{2}\s+U/
63
+ skills_vocational_regexp = /(.+)\s+(\d)\s+\w{2}\s+V/
64
+ skills_graduate_regexp = /(.+)\s+(\d)\s+\w{2}\s+G/
65
+ ignore_privacy_regexp = /PRIVACY ACT INFORMATION/
66
+ ignore_date_regexp = /\(\d{1,2}\/\d{1,2}\)\(\d{1,2}\/\d{1,2}\)/
67
+ ignore_misc_regexp = /None|NONE ASSIGNED/
68
+ ignore_orphaned_skills = /^(\d|L|U|V|G|SH)$/
69
+ ignore_list = nil
70
+ @positions = []
71
+ @skills_all = {}
72
+ @skills_lower = {}
73
+ @skills_upper = {}
74
+ @skills_vocational = {}
75
+ @skills_graduate = {}
76
+ position = {}
77
+ position_branch = ''
78
+ position_date_begin = ''
79
+ position_date_end = ''
80
+ position_title = ''
81
+ position_desc = ''
82
+ inside_experience_section = false
83
+ at_job_title = false
84
+ at_job_desc = false
85
+ content_array = content.split("\n")
86
+
87
+ content_array.each do |line|
88
+ line.strip!
89
+ next if line.empty?
90
+ if line.match(experience_section_start)
91
+ # Reached the job experience section. Begin parsing out.
92
+ inside_experience_section = true
93
+ puts "-- -- -- -- -- -- -- INSIDE EXPERIENCE, PARSING -- -- -- -- -- -- -- " if @debug
94
+ next
95
+ end
96
+ if line.match(experience_section_end)
97
+ puts "-- -- -- -- -- -- -- FINISHED PARSING -- -- -- -- -- -- -- " if @debug
98
+
99
+ # Finished last job position. Appent previous job position.
100
+ if !position_title.empty? && !position_desc.empty?
101
+ puts '-- -- -- -- -- -- -- APPENDING POSITION -- -- -- -- -- -- -- ' if @debug
102
+
103
+ append_position(position_branch, position_date_begin, position_date_end, position_title, position_desc)
104
+ position_branch = ''
105
+ position_date_begin = ''
106
+ position_date_end = ''
107
+ position_title = ''
108
+ position_desc = ''
109
+ end
110
+ break
111
+ end
112
+
113
+ if inside_experience_section
114
+ if line.match(experience_regexp)
115
+ puts "~~~~~ NEW EXPERIENCE: #{line}" if @debug
116
+
117
+ # Determine which branch this job title falls under
118
+ position_branch = BRANCH_ARMY if line.match(/AR-/)
119
+ position_branch = BRANCH_AIR if line.match(/AF-/)
120
+ position_branch = BRANCH_NAVY if line.match(/NV-|NEC-|NER-|LDO-|NWO-/)
121
+ position_branch = BRANCH_MARINES if line.match(/MC-|MCE-/)
122
+ position_branch = BRANCH_COAST if line.match(/CG-|CGR-|CGW-/)
123
+ position_branch = BRANCH_DOD if line.match(/DD-/)
124
+
125
+ # Determine the service date (dd-MMM-yyyy)
126
+ if date_match = line.match(experience_date)
127
+ puts " ^^^^^ PARSING DATE ^^^^^^ "
128
+ position_date_begin = date_match[1] unless date_match[1].nil?
129
+ position_date_end = date_match[2] unless date_match[2].nil?
130
+ end
131
+
132
+ # Next line will be the job titles
133
+ at_job_title = true
134
+
135
+ # Since we're at the next job position, append the previous job position.
136
+ if !position_title.empty? && !position_desc.empty?
137
+ append_position(position_branch, position_date_begin, position_date_end, position_title, position_desc)
138
+ position_branch = ''
139
+ position_date_begin = ''
140
+ position_date_end = ''
141
+ position_title = ''
142
+ position_desc = ''
143
+ end
144
+
145
+ next
146
+ end
147
+
148
+ if at_job_title
149
+ puts "~~~~~ JOB TITLE: #{line}" if @debug
150
+ at_job_title = false
151
+ position_title = line
152
+
153
+ # Next line will be the job description starting point
154
+ puts "~~~~~ AT JOB DESC" if @debug
155
+ at_job_desc = true
156
+ next
157
+ end
158
+
159
+ if skills_match = line.match(skills_lower_regexp)
160
+ skill_is_lower = true
161
+ elsif skills_match = line.match(skills_upper_regexp)
162
+ skill_is_upper = true
163
+ elsif skills_match = line.match(skills_vocational_regexp)
164
+ skill_is_vocational = true
165
+ elsif skills_match = line.match(skills_graduate_regexp)
166
+ skill_is_graduate = true
167
+ end
168
+
169
+ if skill_is_lower || skill_is_upper || skill_is_vocational || skill_is_graduate
170
+ unless skills_match[1].empty?
171
+ # Strip out skill name
172
+ skill_name = skills_match[1].strip!
173
+
174
+ puts "**** SKILL: #{skill_name}" if @debug
175
+
176
+ # Init skill name key, if none exists
177
+ @skills_all[skill_name] = 0 if @skills_all[skill_name].nil?
178
+ @skills_lower[skill_name] = 0 if @skills_lower[skill_name].nil? && skill_is_lower
179
+ @skills_upper[skill_name] = 0 if @skills_upper[skill_name].nil? && skill_is_upper
180
+ @skills_vocational[skill_name] = 0 if @skills_vocational[skill_name].nil? && skill_is_vocational
181
+ @skills_graduate[skill_name] = 0 if @skills_graduate[skill_name].nil? && skill_is_graduate
182
+
183
+ # Strip out skill value (in credits). Add to skills hash
184
+ if !skills_match[1].empty? && is_numeric?(skills_match[2])
185
+ @skills_all[skill_name] = @skills_all[skill_name] + skills_match[2].to_i
186
+ @skills_lower[skill_name] = @skills_lower[skill_name] + skills_match[2].to_i if skill_is_lower
187
+ @skills_upper[skill_name] = @skills_upper[skill_name] + skills_match[2].to_i if skill_is_upper
188
+ @skills_vocational[skill_name] = @skills_vocational[skill_name] + skills_match[2].to_i if skill_is_vocational
189
+ @skills_graduate[skill_name] = @skills_graduate[skill_name] + skills_match[2].to_i if skill_is_graduate
190
+ end
191
+
192
+ end
193
+ next
194
+ end
195
+
196
+ if at_job_desc
197
+ unless line.match(ignore_privacy_regexp) || line.match(ignore_date_regexp) || line.match(ignore_misc_regexp) || line.match(ignore_orphaned_skills)
198
+ puts "m: #{line}" if @debug
199
+ position_desc += line
200
+ end
201
+ end
202
+ end
203
+ end
204
+ # skills.sort_by {|key, value| value}
205
+ end
206
+
207
+ def append_position(branch, date_begin, date_end, title, description)
208
+ position = {}
209
+ position[:branch] = branch
210
+ position[:date_begin] = date_begin
211
+ position[:date_end] = date_end
212
+ position[:title] = title
213
+ position[:description] = description
214
+ @positions.push(position)
215
+
216
+ # @positions[position_title] = position_desc
217
+ # position_title = ''
218
+ # position_desc = ''
219
+ end
220
+
221
+ def create_response
222
+ @jst_response = {}
223
+ @jst_response[:name] = @name
224
+ @jst_response[:rank] = @rank
225
+ @jst_response[:education] = @educations
226
+ @jst_response[:experience] = @positions
227
+ @jst_response[:skills] = @skills_all
228
+ @jst_response[:skills_lower] = @skills_lower
229
+ @jst_response[:skills_upper] = @skills_upper
230
+ @jst_response[:skills_vocational] = @skills_vocational
231
+ @jst_response[:skills_graduate] = @skills_graduate
232
+ end
233
+
234
+ # We'll clean up coursework later....
235
+ # def parse_coursework(content)
236
+ # # Define regexp matchers
237
+ # coursework_regexp = /\-[0-9]{4}\-[0-9]{4}/
238
+ # experience_regexp = /Military Experience/
239
+ # ignore_regexp = /\*\* PRIVACY ACT INFORMATION \*\*/
240
+
241
+ # @coursework = []
242
+ # course = ''
243
+ # inside_course_content = false
244
+ # content_array = content.split("\n")
245
+
246
+ # content_array.each do |line|
247
+ # puts "LINE: #{line}" if @debug
248
+ # if line.match(coursework_regexp) || line.match(experience_regexp)
249
+ # # At the next course. Append previous course information
250
+ # inside_course_content = true
251
+ # @coursework << course unless course.empty?
252
+ # course = line
253
+ # line.match(experience_regexp) ? break : next
254
+ # end
255
+ # if inside_course_content
256
+ # course += line unless line.match(ignore_regexp)
257
+ # end
258
+ # end
259
+ # end
260
+
261
+ def is_numeric?(obj)
262
+ true if Float(obj) rescue false
263
+ end
264
+ end
265
+ end
@@ -0,0 +1,3 @@
1
+ module JST
2
+ VERSION = "0.0.1"
3
+ end
data/startup.rb ADDED
@@ -0,0 +1,12 @@
1
+ # `irb -r ./startup.rb`
2
+ # Just an easy delete & reinstall mechanism
3
+
4
+ # Remove all previous gem installations
5
+ `gem uninstall -Ia jst-parser`
6
+ `rm -rf pkg/`
7
+
8
+ ## Install local source code as gem
9
+ `rake install`
10
+
11
+ ## Require the gem
12
+ require 'jst'
metadata ADDED
@@ -0,0 +1,98 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jst-parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Chris Little
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-05-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: pdf-reader
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 1.3.3
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 1.3.3
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1.3'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: ! "A PDF parser for the Joint Service Transcript (JST), a standardized\n
56
+ \ service transcript for Army, Marine Corps, Navy, and Coast Guard
57
+ personnel. \n (https://jst.doded.mil/faq.html)\n Returns
58
+ accumulated skills, military experience, and education as JSON."
59
+ email: razenghan@gmail.com
60
+ executables: []
61
+ extensions: []
62
+ extra_rdoc_files: []
63
+ files:
64
+ - .gitignore
65
+ - Gemfile
66
+ - Gemfile.lock
67
+ - LICENSE.txt
68
+ - README.md
69
+ - Rakefile
70
+ - jst-parser.gemspec
71
+ - lib/jst.rb
72
+ - lib/jst/version.rb
73
+ - startup.rb
74
+ homepage: http://rubygems.org/gems/jst-parser
75
+ licenses:
76
+ - MIT
77
+ metadata: {}
78
+ post_install_message:
79
+ rdoc_options: []
80
+ require_paths:
81
+ - lib
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ! '>='
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ! '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ requirements: []
93
+ rubyforge_project:
94
+ rubygems_version: 2.0.3
95
+ signing_key:
96
+ specification_version: 4
97
+ summary: Joint Service Transcript (JST) parsing utility.
98
+ test_files: []