vaextractor 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/vaextractor.rb +263 -0
- metadata +74 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 99733aa02a6893e211ea821ee43ad63b558b161a
|
4
|
+
data.tar.gz: 21f5b410828fe8e1e1f66dd72308f726ce4a54e8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 30a3983c99e90003bcfffc1b85d98162402d3d298ec33af539cdf076d7fdfda744a7766c0bcdd3b09117a375d0da014f7dd76a9406ba0a2ecc72e1f959324f44
|
7
|
+
data.tar.gz: ebe76eb202d0ff2819f2a66d7f25006cc35d3bf6f21ae89dd9a76a7f178e71418194705cf2e1becb79b846d7507656d563314ddaef97cdeff1c029c290f8a2c2
|
data/lib/vaextractor.rb
ADDED
@@ -0,0 +1,263 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright 2016 Aaron Y. Lee MD MSCI
|
3
|
+
# University of Washington, Seattle WA
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
#
|
18
|
+
#
|
19
|
+
require 'textoken'
|
20
|
+
|
21
|
+
class VAExtractor
|
22
|
+
@@varegex = /(\s|^|~|:)(20|3E|E)\/\s*(\d+)\s*([+|-])*\s*(\d)*|(HM|CF|LP|NLP)(\W+(@|at|x)*\s*((\d+)(\s*'|\s*"|\s*in|\s*ft|\s*feet)*|face)*|$)/
|
23
|
+
@@snellenlevels = [10,15,20,25,30,40,50,60,70,80,100,125,150,200,250,300,400,600,800]
|
24
|
+
@@validtokens = {"OD" => "OD", "RE" => "OD", "RIGHT" => "OD", "R" => "OD",
|
25
|
+
"L" => "OS", "OS" => "OS", "LE" => "OS", "LEFT" => "OS",
|
26
|
+
"BOTH" => "OU", "BE" => "OU", "OU" => "OU", "BILATERAL" => "OU"}
|
27
|
+
def initialize
|
28
|
+
@usedsnellen = {}
|
29
|
+
@@snellenlevels.each do |k|
|
30
|
+
@usedsnellen[k] = -Math.log(20.0 / k) / Math.log(10.0)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def aligntokens(s)
|
35
|
+
arr = Textoken(s).tokens
|
36
|
+
ret = []
|
37
|
+
lasti = 0
|
38
|
+
arr.each do |w|
|
39
|
+
subs = s[lasti, s.size]
|
40
|
+
i = subs.index(w)
|
41
|
+
ret.push [w, lasti + i]
|
42
|
+
lasti += i
|
43
|
+
end
|
44
|
+
return ret
|
45
|
+
end
|
46
|
+
|
47
|
+
def runentirefreq(rawtext)
|
48
|
+
tokens = aligntokens(rawtext)
|
49
|
+
scores = Hash.new(0)
|
50
|
+
tokens.each do |w, i|
|
51
|
+
w = w.upcase
|
52
|
+
if @@validtokens.has_key?(w) and @@validtokens[w] != "OU"
|
53
|
+
scores[@@validtokens[w]] += 1
|
54
|
+
end
|
55
|
+
end
|
56
|
+
return nil if scores.keys.count == 0
|
57
|
+
#p scores
|
58
|
+
return scores.sort_by {|k,v| v}.last.first
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
def logmar(va)
|
63
|
+
if va[0] == "20"
|
64
|
+
manual = @usedsnellen[va[1].to_i]
|
65
|
+
if va[2] == "+"
|
66
|
+
va[3] = "1" if va[3] == nil
|
67
|
+
denom = @@snellenlevels.index(va[1].to_i)
|
68
|
+
denom -= 1
|
69
|
+
denom = @usedsnellen[@@snellenlevels[denom]]
|
70
|
+
manual = 1.0 * va[3].to_i * (denom - manual) / 5.0 + manual
|
71
|
+
elsif va[2] == "-"
|
72
|
+
va[3] = "1" if va[3] == nil
|
73
|
+
denom = @@snellenlevels.index(va[1].to_i)
|
74
|
+
denom += 1
|
75
|
+
denom = @usedsnellen[@@snellenlevels[denom]]
|
76
|
+
manual = 1.0 * va[3].to_i * (denom - manual) / 5.0 + manual
|
77
|
+
end
|
78
|
+
return manual, va[0...4]
|
79
|
+
elsif va[0] == "3E" or va[0] == "3" or va[0] == "E"
|
80
|
+
denom = va[1].to_i
|
81
|
+
return -Math.log(3.0 / denom) / Math.log(10.0), va[0...4]
|
82
|
+
elsif va[4] == "CF"
|
83
|
+
return 2.0, [va[4],va[8],va[9],nil]
|
84
|
+
elsif va[4] == "HM"
|
85
|
+
return 2.4, [va[4],va[8],va[9],nil]
|
86
|
+
elsif va[4] == "LP"
|
87
|
+
return 2.7, [va[4],va[8],va[9],nil]
|
88
|
+
elsif va[4] == "NLP"
|
89
|
+
return 3.0, [va[4],va[8],va[9],nil]
|
90
|
+
end
|
91
|
+
return nil, nil
|
92
|
+
end
|
93
|
+
|
94
|
+
def searchpriorlines(lines)
|
95
|
+
lines.each do |l|
|
96
|
+
return nil if l.strip == ""
|
97
|
+
arr = Textoken(l).tokens
|
98
|
+
arr.each do |w|
|
99
|
+
if @@validtokens.has_key?(w.upcase)
|
100
|
+
next if @@validtokens[w.upcase] == "OU"
|
101
|
+
return @@validtokens[w.upcase]
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
return nil
|
106
|
+
end
|
107
|
+
|
108
|
+
|
109
|
+
def findlaterality(pos, tokens, linestr)
|
110
|
+
walls = {"." => 10, "!" => 10, "?" => 10, "," => 5, "and" => 5}
|
111
|
+
answers = {}
|
112
|
+
debug = ""
|
113
|
+
revtoken = {}
|
114
|
+
tokens.each do |w, i|
|
115
|
+
revtoken[i] = w
|
116
|
+
end
|
117
|
+
tokens.each do |w, i|
|
118
|
+
w = w.upcase
|
119
|
+
if @@validtokens.has_key?(w)
|
120
|
+
score = 0
|
121
|
+
l = i
|
122
|
+
r = pos
|
123
|
+
l = pos if i > pos
|
124
|
+
r = i if i > pos
|
125
|
+
(l...r).each do |j|
|
126
|
+
next if not revtoken.has_key?(j)
|
127
|
+
score += walls[revtoken[j]] if walls.has_key?(revtoken[j])
|
128
|
+
end
|
129
|
+
if not answers.has_key?(score)
|
130
|
+
answers[score] = []
|
131
|
+
end
|
132
|
+
answers[score].push [@@validtokens[w], (i-pos).abs]
|
133
|
+
end
|
134
|
+
end
|
135
|
+
return nil if answers.keys.count == 0
|
136
|
+
|
137
|
+
bestscore = answers.sort_by {|k, v| k}.first.last
|
138
|
+
sorted = bestscore.sort_by {|r| r[1]}
|
139
|
+
return sorted.first[0], answers
|
140
|
+
end
|
141
|
+
|
142
|
+
|
143
|
+
def extract(rawtext)
|
144
|
+
lines = rawtext.split("\n")
|
145
|
+
debug = false
|
146
|
+
|
147
|
+
rfound = false
|
148
|
+
lfound = false
|
149
|
+
found = false
|
150
|
+
vas = {"OD" => [], "OS" => []}
|
151
|
+
alreadychecked = {}
|
152
|
+
debugtxt = ""
|
153
|
+
for i in (0...lines.count)
|
154
|
+
next if lines[i].strip=~ /^IOP/ or lines[i].strip =~ /^Ta\s/ or lines[i].strip =~ /^Tp/i
|
155
|
+
next if alreadychecked.has_key?(i)
|
156
|
+
arr = lines[i].scan(@@varegex)
|
157
|
+
lines[i].enum_for(:scan, @@varegex).each do |val|
|
158
|
+
#puts "============================================="
|
159
|
+
debugtxt += "NEW VA DETECTED\n"
|
160
|
+
debugtxt += "#{lines[i-1]}\n"
|
161
|
+
debugtxt += "#{lines[i]}\n"
|
162
|
+
debugtxt += "#{lines[i+1]}\n"
|
163
|
+
debugtxt += "#{val}\n"
|
164
|
+
val.shift
|
165
|
+
next if val[3] != nil and val[3].to_i >= 5
|
166
|
+
pos = Regexp.last_match.begin(0)
|
167
|
+
tokens = aligntokens(lines[i])
|
168
|
+
lat,debughash = findlaterality(pos, tokens, lines[i])
|
169
|
+
debugtxt += "#{pos}\n"
|
170
|
+
debugtxt += "#{tokens}\n"
|
171
|
+
debugtxt += "#{debughash}\n"
|
172
|
+
#p lat
|
173
|
+
if lat == "OU"
|
174
|
+
vas["OD"].push [val, i, 5]
|
175
|
+
vas["OS"].push [val, i, 5]
|
176
|
+
elsif lat != nil
|
177
|
+
vas[lat].push [val, i, 5]
|
178
|
+
elsif lat == nil and vas["OD"].count == 0 and vas["OS"].count == 0
|
179
|
+
lat = searchpriorlines(lines[i-3..i-1].reverse)
|
180
|
+
if lat == nil and vas["OD"].count == 0 and vas["OS"].count == 0
|
181
|
+
# most likely the VAs are either two in one line OD/OS or on two consecutive lines
|
182
|
+
found = false
|
183
|
+
arr2 = lines[i+1].scan(@@varegex)
|
184
|
+
if arr2.count > 0 and arr.count > 0
|
185
|
+
arr.each do |row|
|
186
|
+
row.shift
|
187
|
+
next if row[0] != nil and row[3].to_i >= 5
|
188
|
+
vas["OD"].push [row, i, 0]
|
189
|
+
end
|
190
|
+
arr2.each do |row|
|
191
|
+
row.shift
|
192
|
+
next if row[0] != nil and row[3].to_i >= 5
|
193
|
+
vas["OS"].push [row, i+1, 0]
|
194
|
+
end
|
195
|
+
alreadychecked[i+1] = 1
|
196
|
+
found = true
|
197
|
+
elsif arr.count == 2
|
198
|
+
arr[0].shift
|
199
|
+
arr[1].shift
|
200
|
+
next if arr[0] != nil and arr[0][3].to_i >= 5
|
201
|
+
next if arr[1] != nil and arr[1][3].to_i >= 5
|
202
|
+
vas["OD"].push [arr[0], i, 0]
|
203
|
+
vas["OS"].push [arr[1], i, 0]
|
204
|
+
found = true
|
205
|
+
end
|
206
|
+
if not found
|
207
|
+
# worst case scenario, count up all the occurences of r/l and then take highest occuring freq
|
208
|
+
lat = runentirefreq(rawtext)
|
209
|
+
if lat == nil
|
210
|
+
#puts "ERROR: Laterality not found for #{val}"
|
211
|
+
raise ErrorLateralityNotFound
|
212
|
+
else
|
213
|
+
vas[lat].push [val, i, 0]
|
214
|
+
end
|
215
|
+
end
|
216
|
+
else
|
217
|
+
vas[lat].push [val, i, 3]
|
218
|
+
end
|
219
|
+
#exit
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
if vas["OD"].count == 0 and vas["OS"].count == 0
|
225
|
+
#puts "ERROR: No valid visual acuities found"
|
226
|
+
return {:RE => nil, :LE => nil, :RElogmar => nil, :LElogmar => nil}
|
227
|
+
else
|
228
|
+
bcva = {"OD" => nil, "OS" => nil}
|
229
|
+
puts "=================NEW PT" if debug
|
230
|
+
puts rawtext if debug
|
231
|
+
puts "===DEBUG" if debug
|
232
|
+
puts debugtxt if debug
|
233
|
+
puts "===OD" if debug
|
234
|
+
|
235
|
+
vas["OD"].each do |varr,line,priority|
|
236
|
+
puts "new va" if debug
|
237
|
+
p varr if debug
|
238
|
+
p lines[line] if debug
|
239
|
+
p priority if debug
|
240
|
+
lva = logmar(varr)
|
241
|
+
p lva if debug
|
242
|
+
if bcva["OD"] == nil or (bcva["OD"][0] <= priority and lva[0] < bcva["OD"][1])
|
243
|
+
bcva["OD"] = [priority, lva[0].round(4), lva[1]]
|
244
|
+
end
|
245
|
+
end
|
246
|
+
puts "===OS" if debug
|
247
|
+
vas["OS"].each do |varr,line,priority|
|
248
|
+
puts "new va" if debug
|
249
|
+
p varr if debug
|
250
|
+
p lines[line] if debug
|
251
|
+
p priority if debug
|
252
|
+
lva = logmar(varr)
|
253
|
+
p lva if debug
|
254
|
+
if bcva["OS"] == nil or (bcva["OS"][0] <= priority and lva[0] < bcva["OS"][1])
|
255
|
+
bcva["OS"] = [priority, lva[0].round(4), lva[1]]
|
256
|
+
end
|
257
|
+
end
|
258
|
+
bcva["OD"] = [nil, nil, [nil,nil,nil,nil]] if bcva["OD"] == nil
|
259
|
+
bcva["OS"] = [nil, nil, [nil,nil,nil,nil]] if bcva["OS"] == nil
|
260
|
+
return {:RE => bcva["OD"][2], :LE => bcva["OS"][2], :RElogmar => bcva["OD"][1], :LElogmar => bcva["OS"][1]}
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
metadata
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: vaextractor
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Aaron Y. Lee MD MSCI
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-10-03 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: textoken
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.1.2
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.1.2
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: minitest
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 5.9.1
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 5.9.1
|
41
|
+
description: |
|
42
|
+
vaextractor uses rule-based NLP strategy to extract Snellen visual acuities
|
43
|
+
from unstructured ophthalmology clinical notes.
|
44
|
+
email: aaronylee@gmail.com
|
45
|
+
executables: []
|
46
|
+
extensions: []
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- lib/vaextractor.rb
|
50
|
+
homepage: http://github.org/ayl/vaextractor
|
51
|
+
licenses:
|
52
|
+
- GNU GPLv3
|
53
|
+
metadata: {}
|
54
|
+
post_install_message:
|
55
|
+
rdoc_options: []
|
56
|
+
require_paths:
|
57
|
+
- lib
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '0'
|
68
|
+
requirements: []
|
69
|
+
rubyforge_project:
|
70
|
+
rubygems_version: 2.4.8
|
71
|
+
signing_key:
|
72
|
+
specification_version: 4
|
73
|
+
summary: Rule based NLP library to extract visual acuities
|
74
|
+
test_files: []
|