vaextractor 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/vaextractor.rb +263 -0
  3. metadata +74 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 99733aa02a6893e211ea821ee43ad63b558b161a
4
+ data.tar.gz: 21f5b410828fe8e1e1f66dd72308f726ce4a54e8
5
+ SHA512:
6
+ metadata.gz: 30a3983c99e90003bcfffc1b85d98162402d3d298ec33af539cdf076d7fdfda744a7766c0bcdd3b09117a375d0da014f7dd76a9406ba0a2ecc72e1f959324f44
7
+ data.tar.gz: ebe76eb202d0ff2819f2a66d7f25006cc35d3bf6f21ae89dd9a76a7f178e71418194705cf2e1becb79b846d7507656d563314ddaef97cdeff1c029c290f8a2c2
@@ -0,0 +1,263 @@
1
+ #!/usr/bin/env ruby
2
+ # Copyright 2016 Aaron Y. Lee MD MSCI
3
+ # University of Washington, Seattle WA
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ #
18
+ #
19
+ require 'textoken'
20
+
21
+ class VAExtractor
22
+ @@varegex = /(\s|^|~|:)(20|3E|E)\/\s*(\d+)\s*([+|-])*\s*(\d)*|(HM|CF|LP|NLP)(\W+(@|at|x)*\s*((\d+)(\s*'|\s*"|\s*in|\s*ft|\s*feet)*|face)*|$)/
23
+ @@snellenlevels = [10,15,20,25,30,40,50,60,70,80,100,125,150,200,250,300,400,600,800]
24
+ @@validtokens = {"OD" => "OD", "RE" => "OD", "RIGHT" => "OD", "R" => "OD",
25
+ "L" => "OS", "OS" => "OS", "LE" => "OS", "LEFT" => "OS",
26
+ "BOTH" => "OU", "BE" => "OU", "OU" => "OU", "BILATERAL" => "OU"}
27
+ def initialize
28
+ @usedsnellen = {}
29
+ @@snellenlevels.each do |k|
30
+ @usedsnellen[k] = -Math.log(20.0 / k) / Math.log(10.0)
31
+ end
32
+ end
33
+
34
+ def aligntokens(s)
35
+ arr = Textoken(s).tokens
36
+ ret = []
37
+ lasti = 0
38
+ arr.each do |w|
39
+ subs = s[lasti, s.size]
40
+ i = subs.index(w)
41
+ ret.push [w, lasti + i]
42
+ lasti += i
43
+ end
44
+ return ret
45
+ end
46
+
47
+ def runentirefreq(rawtext)
48
+ tokens = aligntokens(rawtext)
49
+ scores = Hash.new(0)
50
+ tokens.each do |w, i|
51
+ w = w.upcase
52
+ if @@validtokens.has_key?(w) and @@validtokens[w] != "OU"
53
+ scores[@@validtokens[w]] += 1
54
+ end
55
+ end
56
+ return nil if scores.keys.count == 0
57
+ #p scores
58
+ return scores.sort_by {|k,v| v}.last.first
59
+ end
60
+
61
+
62
+ def logmar(va)
63
+ if va[0] == "20"
64
+ manual = @usedsnellen[va[1].to_i]
65
+ if va[2] == "+"
66
+ va[3] = "1" if va[3] == nil
67
+ denom = @@snellenlevels.index(va[1].to_i)
68
+ denom -= 1
69
+ denom = @usedsnellen[@@snellenlevels[denom]]
70
+ manual = 1.0 * va[3].to_i * (denom - manual) / 5.0 + manual
71
+ elsif va[2] == "-"
72
+ va[3] = "1" if va[3] == nil
73
+ denom = @@snellenlevels.index(va[1].to_i)
74
+ denom += 1
75
+ denom = @usedsnellen[@@snellenlevels[denom]]
76
+ manual = 1.0 * va[3].to_i * (denom - manual) / 5.0 + manual
77
+ end
78
+ return manual, va[0...4]
79
+ elsif va[0] == "3E" or va[0] == "3" or va[0] == "E"
80
+ denom = va[1].to_i
81
+ return -Math.log(3.0 / denom) / Math.log(10.0), va[0...4]
82
+ elsif va[4] == "CF"
83
+ return 2.0, [va[4],va[8],va[9],nil]
84
+ elsif va[4] == "HM"
85
+ return 2.4, [va[4],va[8],va[9],nil]
86
+ elsif va[4] == "LP"
87
+ return 2.7, [va[4],va[8],va[9],nil]
88
+ elsif va[4] == "NLP"
89
+ return 3.0, [va[4],va[8],va[9],nil]
90
+ end
91
+ return nil, nil
92
+ end
93
+
94
+ def searchpriorlines(lines)
95
+ lines.each do |l|
96
+ return nil if l.strip == ""
97
+ arr = Textoken(l).tokens
98
+ arr.each do |w|
99
+ if @@validtokens.has_key?(w.upcase)
100
+ next if @@validtokens[w.upcase] == "OU"
101
+ return @@validtokens[w.upcase]
102
+ end
103
+ end
104
+ end
105
+ return nil
106
+ end
107
+
108
+
109
+ def findlaterality(pos, tokens, linestr)
110
+ walls = {"." => 10, "!" => 10, "?" => 10, "," => 5, "and" => 5}
111
+ answers = {}
112
+ debug = ""
113
+ revtoken = {}
114
+ tokens.each do |w, i|
115
+ revtoken[i] = w
116
+ end
117
+ tokens.each do |w, i|
118
+ w = w.upcase
119
+ if @@validtokens.has_key?(w)
120
+ score = 0
121
+ l = i
122
+ r = pos
123
+ l = pos if i > pos
124
+ r = i if i > pos
125
+ (l...r).each do |j|
126
+ next if not revtoken.has_key?(j)
127
+ score += walls[revtoken[j]] if walls.has_key?(revtoken[j])
128
+ end
129
+ if not answers.has_key?(score)
130
+ answers[score] = []
131
+ end
132
+ answers[score].push [@@validtokens[w], (i-pos).abs]
133
+ end
134
+ end
135
+ return nil if answers.keys.count == 0
136
+
137
+ bestscore = answers.sort_by {|k, v| k}.first.last
138
+ sorted = bestscore.sort_by {|r| r[1]}
139
+ return sorted.first[0], answers
140
+ end
141
+
142
+
143
+ def extract(rawtext)
144
+ lines = rawtext.split("\n")
145
+ debug = false
146
+
147
+ rfound = false
148
+ lfound = false
149
+ found = false
150
+ vas = {"OD" => [], "OS" => []}
151
+ alreadychecked = {}
152
+ debugtxt = ""
153
+ for i in (0...lines.count)
154
+ next if lines[i].strip=~ /^IOP/ or lines[i].strip =~ /^Ta\s/ or lines[i].strip =~ /^Tp/i
155
+ next if alreadychecked.has_key?(i)
156
+ arr = lines[i].scan(@@varegex)
157
+ lines[i].enum_for(:scan, @@varegex).each do |val|
158
+ #puts "============================================="
159
+ debugtxt += "NEW VA DETECTED\n"
160
+ debugtxt += "#{lines[i-1]}\n"
161
+ debugtxt += "#{lines[i]}\n"
162
+ debugtxt += "#{lines[i+1]}\n"
163
+ debugtxt += "#{val}\n"
164
+ val.shift
165
+ next if val[3] != nil and val[3].to_i >= 5
166
+ pos = Regexp.last_match.begin(0)
167
+ tokens = aligntokens(lines[i])
168
+ lat,debughash = findlaterality(pos, tokens, lines[i])
169
+ debugtxt += "#{pos}\n"
170
+ debugtxt += "#{tokens}\n"
171
+ debugtxt += "#{debughash}\n"
172
+ #p lat
173
+ if lat == "OU"
174
+ vas["OD"].push [val, i, 5]
175
+ vas["OS"].push [val, i, 5]
176
+ elsif lat != nil
177
+ vas[lat].push [val, i, 5]
178
+ elsif lat == nil and vas["OD"].count == 0 and vas["OS"].count == 0
179
+ lat = searchpriorlines(lines[i-3..i-1].reverse)
180
+ if lat == nil and vas["OD"].count == 0 and vas["OS"].count == 0
181
+ # most likely the VAs are either two in one line OD/OS or on two consecutive lines
182
+ found = false
183
+ arr2 = lines[i+1].scan(@@varegex)
184
+ if arr2.count > 0 and arr.count > 0
185
+ arr.each do |row|
186
+ row.shift
187
+ next if row[0] != nil and row[3].to_i >= 5
188
+ vas["OD"].push [row, i, 0]
189
+ end
190
+ arr2.each do |row|
191
+ row.shift
192
+ next if row[0] != nil and row[3].to_i >= 5
193
+ vas["OS"].push [row, i+1, 0]
194
+ end
195
+ alreadychecked[i+1] = 1
196
+ found = true
197
+ elsif arr.count == 2
198
+ arr[0].shift
199
+ arr[1].shift
200
+ next if arr[0] != nil and arr[0][3].to_i >= 5
201
+ next if arr[1] != nil and arr[1][3].to_i >= 5
202
+ vas["OD"].push [arr[0], i, 0]
203
+ vas["OS"].push [arr[1], i, 0]
204
+ found = true
205
+ end
206
+ if not found
207
+ # worst case scenario, count up all the occurences of r/l and then take highest occuring freq
208
+ lat = runentirefreq(rawtext)
209
+ if lat == nil
210
+ #puts "ERROR: Laterality not found for #{val}"
211
+ raise ErrorLateralityNotFound
212
+ else
213
+ vas[lat].push [val, i, 0]
214
+ end
215
+ end
216
+ else
217
+ vas[lat].push [val, i, 3]
218
+ end
219
+ #exit
220
+ end
221
+ end
222
+ end
223
+
224
+ if vas["OD"].count == 0 and vas["OS"].count == 0
225
+ #puts "ERROR: No valid visual acuities found"
226
+ return {:RE => nil, :LE => nil, :RElogmar => nil, :LElogmar => nil}
227
+ else
228
+ bcva = {"OD" => nil, "OS" => nil}
229
+ puts "=================NEW PT" if debug
230
+ puts rawtext if debug
231
+ puts "===DEBUG" if debug
232
+ puts debugtxt if debug
233
+ puts "===OD" if debug
234
+
235
+ vas["OD"].each do |varr,line,priority|
236
+ puts "new va" if debug
237
+ p varr if debug
238
+ p lines[line] if debug
239
+ p priority if debug
240
+ lva = logmar(varr)
241
+ p lva if debug
242
+ if bcva["OD"] == nil or (bcva["OD"][0] <= priority and lva[0] < bcva["OD"][1])
243
+ bcva["OD"] = [priority, lva[0].round(4), lva[1]]
244
+ end
245
+ end
246
+ puts "===OS" if debug
247
+ vas["OS"].each do |varr,line,priority|
248
+ puts "new va" if debug
249
+ p varr if debug
250
+ p lines[line] if debug
251
+ p priority if debug
252
+ lva = logmar(varr)
253
+ p lva if debug
254
+ if bcva["OS"] == nil or (bcva["OS"][0] <= priority and lva[0] < bcva["OS"][1])
255
+ bcva["OS"] = [priority, lva[0].round(4), lva[1]]
256
+ end
257
+ end
258
+ bcva["OD"] = [nil, nil, [nil,nil,nil,nil]] if bcva["OD"] == nil
259
+ bcva["OS"] = [nil, nil, [nil,nil,nil,nil]] if bcva["OS"] == nil
260
+ return {:RE => bcva["OD"][2], :LE => bcva["OS"][2], :RElogmar => bcva["OD"][1], :LElogmar => bcva["OS"][1]}
261
+ end
262
+ end
263
+ end
metadata ADDED
@@ -0,0 +1,74 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: vaextractor
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Aaron Y. Lee MD MSCI
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-10-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: textoken
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.1.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 1.1.2
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '='
32
+ - !ruby/object:Gem::Version
33
+ version: 5.9.1
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '='
39
+ - !ruby/object:Gem::Version
40
+ version: 5.9.1
41
+ description: |
42
+ vaextractor uses rule-based NLP strategy to extract Snellen visual acuities
43
+ from unstructured ophthalmology clinical notes.
44
+ email: aaronylee@gmail.com
45
+ executables: []
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - lib/vaextractor.rb
50
+ homepage: http://github.org/ayl/vaextractor
51
+ licenses:
52
+ - GNU GPLv3
53
+ metadata: {}
54
+ post_install_message:
55
+ rdoc_options: []
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ required_rubygems_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ requirements: []
69
+ rubyforge_project:
70
+ rubygems_version: 2.4.8
71
+ signing_key:
72
+ specification_version: 4
73
+ summary: Rule based NLP library to extract visual acuities
74
+ test_files: []