vaextractor 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/vaextractor.rb +263 -0
- metadata +74 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 99733aa02a6893e211ea821ee43ad63b558b161a
|
4
|
+
data.tar.gz: 21f5b410828fe8e1e1f66dd72308f726ce4a54e8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 30a3983c99e90003bcfffc1b85d98162402d3d298ec33af539cdf076d7fdfda744a7766c0bcdd3b09117a375d0da014f7dd76a9406ba0a2ecc72e1f959324f44
|
7
|
+
data.tar.gz: ebe76eb202d0ff2819f2a66d7f25006cc35d3bf6f21ae89dd9a76a7f178e71418194705cf2e1becb79b846d7507656d563314ddaef97cdeff1c029c290f8a2c2
|
data/lib/vaextractor.rb
ADDED
@@ -0,0 +1,263 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright 2016 Aaron Y. Lee MD MSCI
|
3
|
+
# University of Washington, Seattle WA
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
#
|
18
|
+
#
|
19
|
+
require 'textoken'
|
20
|
+
|
21
|
+
class VAExtractor
|
22
|
+
@@varegex = /(\s|^|~|:)(20|3E|E)\/\s*(\d+)\s*([+|-])*\s*(\d)*|(HM|CF|LP|NLP)(\W+(@|at|x)*\s*((\d+)(\s*'|\s*"|\s*in|\s*ft|\s*feet)*|face)*|$)/
|
23
|
+
@@snellenlevels = [10,15,20,25,30,40,50,60,70,80,100,125,150,200,250,300,400,600,800]
|
24
|
+
@@validtokens = {"OD" => "OD", "RE" => "OD", "RIGHT" => "OD", "R" => "OD",
|
25
|
+
"L" => "OS", "OS" => "OS", "LE" => "OS", "LEFT" => "OS",
|
26
|
+
"BOTH" => "OU", "BE" => "OU", "OU" => "OU", "BILATERAL" => "OU"}
|
27
|
+
def initialize
|
28
|
+
@usedsnellen = {}
|
29
|
+
@@snellenlevels.each do |k|
|
30
|
+
@usedsnellen[k] = -Math.log(20.0 / k) / Math.log(10.0)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def aligntokens(s)
|
35
|
+
arr = Textoken(s).tokens
|
36
|
+
ret = []
|
37
|
+
lasti = 0
|
38
|
+
arr.each do |w|
|
39
|
+
subs = s[lasti, s.size]
|
40
|
+
i = subs.index(w)
|
41
|
+
ret.push [w, lasti + i]
|
42
|
+
lasti += i
|
43
|
+
end
|
44
|
+
return ret
|
45
|
+
end
|
46
|
+
|
47
|
+
def runentirefreq(rawtext)
|
48
|
+
tokens = aligntokens(rawtext)
|
49
|
+
scores = Hash.new(0)
|
50
|
+
tokens.each do |w, i|
|
51
|
+
w = w.upcase
|
52
|
+
if @@validtokens.has_key?(w) and @@validtokens[w] != "OU"
|
53
|
+
scores[@@validtokens[w]] += 1
|
54
|
+
end
|
55
|
+
end
|
56
|
+
return nil if scores.keys.count == 0
|
57
|
+
#p scores
|
58
|
+
return scores.sort_by {|k,v| v}.last.first
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
def logmar(va)
|
63
|
+
if va[0] == "20"
|
64
|
+
manual = @usedsnellen[va[1].to_i]
|
65
|
+
if va[2] == "+"
|
66
|
+
va[3] = "1" if va[3] == nil
|
67
|
+
denom = @@snellenlevels.index(va[1].to_i)
|
68
|
+
denom -= 1
|
69
|
+
denom = @usedsnellen[@@snellenlevels[denom]]
|
70
|
+
manual = 1.0 * va[3].to_i * (denom - manual) / 5.0 + manual
|
71
|
+
elsif va[2] == "-"
|
72
|
+
va[3] = "1" if va[3] == nil
|
73
|
+
denom = @@snellenlevels.index(va[1].to_i)
|
74
|
+
denom += 1
|
75
|
+
denom = @usedsnellen[@@snellenlevels[denom]]
|
76
|
+
manual = 1.0 * va[3].to_i * (denom - manual) / 5.0 + manual
|
77
|
+
end
|
78
|
+
return manual, va[0...4]
|
79
|
+
elsif va[0] == "3E" or va[0] == "3" or va[0] == "E"
|
80
|
+
denom = va[1].to_i
|
81
|
+
return -Math.log(3.0 / denom) / Math.log(10.0), va[0...4]
|
82
|
+
elsif va[4] == "CF"
|
83
|
+
return 2.0, [va[4],va[8],va[9],nil]
|
84
|
+
elsif va[4] == "HM"
|
85
|
+
return 2.4, [va[4],va[8],va[9],nil]
|
86
|
+
elsif va[4] == "LP"
|
87
|
+
return 2.7, [va[4],va[8],va[9],nil]
|
88
|
+
elsif va[4] == "NLP"
|
89
|
+
return 3.0, [va[4],va[8],va[9],nil]
|
90
|
+
end
|
91
|
+
return nil, nil
|
92
|
+
end
|
93
|
+
|
94
|
+
def searchpriorlines(lines)
|
95
|
+
lines.each do |l|
|
96
|
+
return nil if l.strip == ""
|
97
|
+
arr = Textoken(l).tokens
|
98
|
+
arr.each do |w|
|
99
|
+
if @@validtokens.has_key?(w.upcase)
|
100
|
+
next if @@validtokens[w.upcase] == "OU"
|
101
|
+
return @@validtokens[w.upcase]
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
return nil
|
106
|
+
end
|
107
|
+
|
108
|
+
|
109
|
+
def findlaterality(pos, tokens, linestr)
|
110
|
+
walls = {"." => 10, "!" => 10, "?" => 10, "," => 5, "and" => 5}
|
111
|
+
answers = {}
|
112
|
+
debug = ""
|
113
|
+
revtoken = {}
|
114
|
+
tokens.each do |w, i|
|
115
|
+
revtoken[i] = w
|
116
|
+
end
|
117
|
+
tokens.each do |w, i|
|
118
|
+
w = w.upcase
|
119
|
+
if @@validtokens.has_key?(w)
|
120
|
+
score = 0
|
121
|
+
l = i
|
122
|
+
r = pos
|
123
|
+
l = pos if i > pos
|
124
|
+
r = i if i > pos
|
125
|
+
(l...r).each do |j|
|
126
|
+
next if not revtoken.has_key?(j)
|
127
|
+
score += walls[revtoken[j]] if walls.has_key?(revtoken[j])
|
128
|
+
end
|
129
|
+
if not answers.has_key?(score)
|
130
|
+
answers[score] = []
|
131
|
+
end
|
132
|
+
answers[score].push [@@validtokens[w], (i-pos).abs]
|
133
|
+
end
|
134
|
+
end
|
135
|
+
return nil if answers.keys.count == 0
|
136
|
+
|
137
|
+
bestscore = answers.sort_by {|k, v| k}.first.last
|
138
|
+
sorted = bestscore.sort_by {|r| r[1]}
|
139
|
+
return sorted.first[0], answers
|
140
|
+
end
|
141
|
+
|
142
|
+
|
143
|
+
def extract(rawtext)
|
144
|
+
lines = rawtext.split("\n")
|
145
|
+
debug = false
|
146
|
+
|
147
|
+
rfound = false
|
148
|
+
lfound = false
|
149
|
+
found = false
|
150
|
+
vas = {"OD" => [], "OS" => []}
|
151
|
+
alreadychecked = {}
|
152
|
+
debugtxt = ""
|
153
|
+
for i in (0...lines.count)
|
154
|
+
next if lines[i].strip=~ /^IOP/ or lines[i].strip =~ /^Ta\s/ or lines[i].strip =~ /^Tp/i
|
155
|
+
next if alreadychecked.has_key?(i)
|
156
|
+
arr = lines[i].scan(@@varegex)
|
157
|
+
lines[i].enum_for(:scan, @@varegex).each do |val|
|
158
|
+
#puts "============================================="
|
159
|
+
debugtxt += "NEW VA DETECTED\n"
|
160
|
+
debugtxt += "#{lines[i-1]}\n"
|
161
|
+
debugtxt += "#{lines[i]}\n"
|
162
|
+
debugtxt += "#{lines[i+1]}\n"
|
163
|
+
debugtxt += "#{val}\n"
|
164
|
+
val.shift
|
165
|
+
next if val[3] != nil and val[3].to_i >= 5
|
166
|
+
pos = Regexp.last_match.begin(0)
|
167
|
+
tokens = aligntokens(lines[i])
|
168
|
+
lat,debughash = findlaterality(pos, tokens, lines[i])
|
169
|
+
debugtxt += "#{pos}\n"
|
170
|
+
debugtxt += "#{tokens}\n"
|
171
|
+
debugtxt += "#{debughash}\n"
|
172
|
+
#p lat
|
173
|
+
if lat == "OU"
|
174
|
+
vas["OD"].push [val, i, 5]
|
175
|
+
vas["OS"].push [val, i, 5]
|
176
|
+
elsif lat != nil
|
177
|
+
vas[lat].push [val, i, 5]
|
178
|
+
elsif lat == nil and vas["OD"].count == 0 and vas["OS"].count == 0
|
179
|
+
lat = searchpriorlines(lines[i-3..i-1].reverse)
|
180
|
+
if lat == nil and vas["OD"].count == 0 and vas["OS"].count == 0
|
181
|
+
# most likely the VAs are either two in one line OD/OS or on two consecutive lines
|
182
|
+
found = false
|
183
|
+
arr2 = lines[i+1].scan(@@varegex)
|
184
|
+
if arr2.count > 0 and arr.count > 0
|
185
|
+
arr.each do |row|
|
186
|
+
row.shift
|
187
|
+
next if row[0] != nil and row[3].to_i >= 5
|
188
|
+
vas["OD"].push [row, i, 0]
|
189
|
+
end
|
190
|
+
arr2.each do |row|
|
191
|
+
row.shift
|
192
|
+
next if row[0] != nil and row[3].to_i >= 5
|
193
|
+
vas["OS"].push [row, i+1, 0]
|
194
|
+
end
|
195
|
+
alreadychecked[i+1] = 1
|
196
|
+
found = true
|
197
|
+
elsif arr.count == 2
|
198
|
+
arr[0].shift
|
199
|
+
arr[1].shift
|
200
|
+
next if arr[0] != nil and arr[0][3].to_i >= 5
|
201
|
+
next if arr[1] != nil and arr[1][3].to_i >= 5
|
202
|
+
vas["OD"].push [arr[0], i, 0]
|
203
|
+
vas["OS"].push [arr[1], i, 0]
|
204
|
+
found = true
|
205
|
+
end
|
206
|
+
if not found
|
207
|
+
# worst case scenario, count up all the occurences of r/l and then take highest occuring freq
|
208
|
+
lat = runentirefreq(rawtext)
|
209
|
+
if lat == nil
|
210
|
+
#puts "ERROR: Laterality not found for #{val}"
|
211
|
+
raise ErrorLateralityNotFound
|
212
|
+
else
|
213
|
+
vas[lat].push [val, i, 0]
|
214
|
+
end
|
215
|
+
end
|
216
|
+
else
|
217
|
+
vas[lat].push [val, i, 3]
|
218
|
+
end
|
219
|
+
#exit
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
if vas["OD"].count == 0 and vas["OS"].count == 0
|
225
|
+
#puts "ERROR: No valid visual acuities found"
|
226
|
+
return {:RE => nil, :LE => nil, :RElogmar => nil, :LElogmar => nil}
|
227
|
+
else
|
228
|
+
bcva = {"OD" => nil, "OS" => nil}
|
229
|
+
puts "=================NEW PT" if debug
|
230
|
+
puts rawtext if debug
|
231
|
+
puts "===DEBUG" if debug
|
232
|
+
puts debugtxt if debug
|
233
|
+
puts "===OD" if debug
|
234
|
+
|
235
|
+
vas["OD"].each do |varr,line,priority|
|
236
|
+
puts "new va" if debug
|
237
|
+
p varr if debug
|
238
|
+
p lines[line] if debug
|
239
|
+
p priority if debug
|
240
|
+
lva = logmar(varr)
|
241
|
+
p lva if debug
|
242
|
+
if bcva["OD"] == nil or (bcva["OD"][0] <= priority and lva[0] < bcva["OD"][1])
|
243
|
+
bcva["OD"] = [priority, lva[0].round(4), lva[1]]
|
244
|
+
end
|
245
|
+
end
|
246
|
+
puts "===OS" if debug
|
247
|
+
vas["OS"].each do |varr,line,priority|
|
248
|
+
puts "new va" if debug
|
249
|
+
p varr if debug
|
250
|
+
p lines[line] if debug
|
251
|
+
p priority if debug
|
252
|
+
lva = logmar(varr)
|
253
|
+
p lva if debug
|
254
|
+
if bcva["OS"] == nil or (bcva["OS"][0] <= priority and lva[0] < bcva["OS"][1])
|
255
|
+
bcva["OS"] = [priority, lva[0].round(4), lva[1]]
|
256
|
+
end
|
257
|
+
end
|
258
|
+
bcva["OD"] = [nil, nil, [nil,nil,nil,nil]] if bcva["OD"] == nil
|
259
|
+
bcva["OS"] = [nil, nil, [nil,nil,nil,nil]] if bcva["OS"] == nil
|
260
|
+
return {:RE => bcva["OD"][2], :LE => bcva["OS"][2], :RElogmar => bcva["OD"][1], :LElogmar => bcva["OS"][1]}
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
metadata
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: vaextractor
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Aaron Y. Lee MD MSCI
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-10-03 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: textoken
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.1.2
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.1.2
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: minitest
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 5.9.1
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 5.9.1
|
41
|
+
description: |
|
42
|
+
vaextractor uses rule-based NLP strategy to extract Snellen visual acuities
|
43
|
+
from unstructured ophthalmology clinical notes.
|
44
|
+
email: aaronylee@gmail.com
|
45
|
+
executables: []
|
46
|
+
extensions: []
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- lib/vaextractor.rb
|
50
|
+
homepage: http://github.org/ayl/vaextractor
|
51
|
+
licenses:
|
52
|
+
- GNU GPLv3
|
53
|
+
metadata: {}
|
54
|
+
post_install_message:
|
55
|
+
rdoc_options: []
|
56
|
+
require_paths:
|
57
|
+
- lib
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '0'
|
68
|
+
requirements: []
|
69
|
+
rubyforge_project:
|
70
|
+
rubygems_version: 2.4.8
|
71
|
+
signing_key:
|
72
|
+
specification_version: 4
|
73
|
+
summary: Rule based NLP library to extract visual acuities
|
74
|
+
test_files: []
|