rbbt-text 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rbbt/bow/dictionary.rb +1 -1
- data/lib/rbbt/bow/misc.rb +2 -2
- data/lib/rbbt/ner/NER.rb +22 -0
- data/lib/rbbt/ner/abner.rb +8 -4
- data/lib/rbbt/ner/annotations.rb +123 -0
- data/lib/rbbt/ner/banner.rb +6 -4
- data/lib/rbbt/ner/oscar3.rb +29 -13
- data/lib/rbbt/ner/regexpNER.rb +69 -45
- data/lib/rbbt/ner/token_trieNER.rb +168 -0
- data/test/rbbt/ner/test_NER.rb +10 -0
- data/test/rbbt/ner/test_abner.rb +2 -2
- data/test/rbbt/ner/test_annotations.rb +8 -0
- data/test/rbbt/ner/test_banner.rb +2 -2
- data/test/rbbt/ner/test_oscar3.rb +35 -2
- data/test/rbbt/ner/test_regexpNER.rb +83 -35
- data/test/rbbt/ner/test_token_trieNER.rb +112 -0
- metadata +15 -12
- data/lib/rbbt/ner/named_entity.rb +0 -11
- data/lib/rbbt/ner/tokenNER.rb +0 -237
- data/test/rbbt/ner/test_named_entity.rb +0 -16
- data/test/rbbt/ner/test_tokenNER.rb +0 -239
@@ -0,0 +1,168 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/util/tsv'
|
3
|
+
require 'rbbt/ner/annotations'
|
4
|
+
require 'rbbt/ner/NER'
|
5
|
+
|
6
|
+
class TokenTrieNER < NER
|
7
|
+
def self.clean(token)
|
8
|
+
if token.length > 3
|
9
|
+
token.downcase
|
10
|
+
else
|
11
|
+
token
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.prepare_token(token, start)
|
16
|
+
Token.annotate(clean(token), start, token)
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.tokenize(text, split_at = /\s|(\(|\)|[-."':,])/, start = 0)
|
20
|
+
|
21
|
+
tokens = []
|
22
|
+
while matchdata = text.match(split_at)
|
23
|
+
tokens << prepare_token(matchdata.pre_match, start) unless matchdata.pre_match.empty?
|
24
|
+
tokens << prepare_token(matchdata.captures.first, start + matchdata.begin(1)) if matchdata.captures.any? and not matchdata.captures.first.empty?
|
25
|
+
start += matchdata.end(0)
|
26
|
+
text = matchdata.post_match
|
27
|
+
end
|
28
|
+
tokens << prepare_token(text, start) unless text.empty?
|
29
|
+
|
30
|
+
tokens
|
31
|
+
end
|
32
|
+
|
33
|
+
#{{{ Process dictionary
|
34
|
+
|
35
|
+
class Code
|
36
|
+
attr_accessor :value, :type
|
37
|
+
def initialize(value, type = nil)
|
38
|
+
@value = value
|
39
|
+
@type = type
|
40
|
+
end
|
41
|
+
|
42
|
+
def to_s
|
43
|
+
[type, value] * ":"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.index_for_tokens(tokens, code, type = nil)
|
48
|
+
if tokens.empty?
|
49
|
+
{:END => [Code.new code, type]}
|
50
|
+
else
|
51
|
+
{tokens.shift => index_for_tokens(tokens, code, type)}
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.merge(index1, index2)
|
56
|
+
index2.each do |key, new_index2|
|
57
|
+
case
|
58
|
+
when key == :END
|
59
|
+
index1[:END] ||= []
|
60
|
+
index1[:END] += new_index2.reject{|new| index1[:END].collect{|e| e.to_s }.include? new.to_s }
|
61
|
+
index1[:END].uniq!
|
62
|
+
when index1.include?(key)
|
63
|
+
merge(index1[key], new_index2)
|
64
|
+
else
|
65
|
+
index1[key] = new_index2
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def self.process(hash, type = nil)
|
71
|
+
index = {}
|
72
|
+
hash.each do |code, names|
|
73
|
+
names.flatten.each do |name|
|
74
|
+
next if name.empty? or name.length < 2
|
75
|
+
tokens = tokenize name
|
76
|
+
|
77
|
+
merge(index, index_for_tokens(tokens, code, type)) unless tokens.empty?
|
78
|
+
end
|
79
|
+
end
|
80
|
+
index
|
81
|
+
end
|
82
|
+
|
83
|
+
#{{{ Matching
|
84
|
+
|
85
|
+
def self.find(index, tokens, longest_match = true)
|
86
|
+
return nil unless index.include? tokens.first
|
87
|
+
|
88
|
+
head = tokens.shift
|
89
|
+
next_index = index[head]
|
90
|
+
|
91
|
+
if tokens.empty?
|
92
|
+
if next_index.include? :END
|
93
|
+
return [next_index[:END], [head]]
|
94
|
+
else
|
95
|
+
tokens.unshift head
|
96
|
+
return nil
|
97
|
+
end
|
98
|
+
else
|
99
|
+
|
100
|
+
return [next_index[:END], [head]] if next_index.include?(:END) and not longest_match
|
101
|
+
|
102
|
+
matches = find(next_index, tokens)
|
103
|
+
if not matches.nil?
|
104
|
+
matches.last.unshift head
|
105
|
+
return matches
|
106
|
+
end
|
107
|
+
|
108
|
+
return [next_index[:END], [head]] if next_index.include?(:END)
|
109
|
+
|
110
|
+
tokens.unshift head
|
111
|
+
return nil
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def self.make_match(match_tokens, type, codes)
|
116
|
+
match = ""
|
117
|
+
match_offset = match_tokens.first.offset
|
118
|
+
match_tokens.each{|t|
|
119
|
+
match << " " * (t.offset - (match_offset + match.length)) if t.offset > (match_offset + match.length)
|
120
|
+
match << t.original
|
121
|
+
}
|
122
|
+
|
123
|
+
NamedEntity.annotate(match, match_tokens.first.offset, type, codes)
|
124
|
+
end
|
125
|
+
|
126
|
+
attr_accessor :index, :longest_match, :type
|
127
|
+
def initialize(file, type = nil, options = {})
|
128
|
+
options = Misc.add_defaults options, :flatten => true, :longest_match => true
|
129
|
+
@longest_match = options.delete :longest_match
|
130
|
+
|
131
|
+
file = [file] unless Array === file
|
132
|
+
@index = {}
|
133
|
+
file.each do |f| TokenTrieNER.merge(@index, TokenTrieNER.process(TSV.new(f, options), type)) end
|
134
|
+
end
|
135
|
+
|
136
|
+
def merge(new, type = nil)
|
137
|
+
case
|
138
|
+
when TokenTrieNER === new
|
139
|
+
TokenTrieNER.merge(@index, new.index)
|
140
|
+
when Hash === new
|
141
|
+
TokenTrieNER.merge(@index, new)
|
142
|
+
when TSV === new
|
143
|
+
TokenTrieNER.merge(@index, TokenTrieNER.process(new,type))
|
144
|
+
when String === new
|
145
|
+
TokenTrieNER.merge(@index, TokenTrieNER.process(TSV.new(new, :flatten => true), type))
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def match(text)
|
150
|
+
tokens = TokenTrieNER.tokenize text
|
151
|
+
|
152
|
+
matches = []
|
153
|
+
while tokens.any?
|
154
|
+
new_matches = TokenTrieNER.find(@index, tokens, longest_match)
|
155
|
+
|
156
|
+
if new_matches
|
157
|
+
codes, match_tokens = new_matches
|
158
|
+
matches << TokenTrieNER.make_match(match_tokens, codes.collect{|c| c.type}, codes.collect{|c| c.value})
|
159
|
+
else
|
160
|
+
tokens.shift
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
matches
|
165
|
+
end
|
166
|
+
|
167
|
+
end
|
168
|
+
|
data/test/rbbt/ner/test_abner.rb
CHANGED
@@ -4,11 +4,11 @@ require 'test/unit'
|
|
4
4
|
|
5
5
|
class TestAbner < Test::Unit::TestCase
|
6
6
|
|
7
|
-
def
|
7
|
+
def test_match
|
8
8
|
begin
|
9
9
|
ner = Abner.new
|
10
10
|
|
11
|
-
mentions = ner.
|
11
|
+
mentions = ner.match(" The P-ITIM-compelled multi-phosphoprotein complex binds to and activates SHP-2, which in turn dephosphorylates SHIP and Shc and probably other substrates.")
|
12
12
|
["SHP-2", "SHIP", "Shc"].each{|mention|
|
13
13
|
assert(mentions.include? mention)
|
14
14
|
}
|
@@ -4,11 +4,11 @@ require 'test/unit'
|
|
4
4
|
|
5
5
|
class TestBanner < Test::Unit::TestCase
|
6
6
|
|
7
|
-
def
|
7
|
+
def test_match
|
8
8
|
begin
|
9
9
|
ner = Banner.new
|
10
10
|
|
11
|
-
mentions = ner.
|
11
|
+
mentions = ner.match(" The P-ITIM-compelled multi-phosphoprotein complex binds to and activates SHP-2, which in turn dephosphorylates SHIP and Shc and probably other substrates.")
|
12
12
|
["SHP - 2", "SHIP", "Shc"].each{|mention|
|
13
13
|
assert(mentions.include? mention)
|
14
14
|
}
|
@@ -6,12 +6,12 @@ require 'test/unit'
|
|
6
6
|
class TestOSCAR3 < Test::Unit::TestCase
|
7
7
|
|
8
8
|
|
9
|
-
def
|
9
|
+
def test_match
|
10
10
|
begin
|
11
11
|
ner = OSCAR3.new
|
12
12
|
str = "Alternatively, rearrangement of O-(ω-haloalkyl)esters 34 of 2-carboethoxy-N-hydroxypyridine-2-selone affords azonianaphthalenium halides 37 in 79% yield"
|
13
13
|
|
14
|
-
mentions = ner.
|
14
|
+
mentions = ner.match(str, "CM", false)
|
15
15
|
good_mentions = ["2-carboethoxy-N-hydroxypyridine-2-selone", "O-(ω-haloalkyl)esters"]
|
16
16
|
|
17
17
|
good_mentions.each{|mention|
|
@@ -22,4 +22,37 @@ class TestOSCAR3 < Test::Unit::TestCase
|
|
22
22
|
puts $!.backtrace
|
23
23
|
end
|
24
24
|
end
|
25
|
+
|
26
|
+
def test_ranges
|
27
|
+
begin
|
28
|
+
ner = OSCAR3.new
|
29
|
+
str =<<-EOF
|
30
|
+
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
31
|
+
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
32
|
+
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
33
|
+
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
34
|
+
This otherone talks about O-(ω-haloalkyl)esters.
|
35
|
+
This otherone talks about O-(ω-haloalkyl)esters.
|
36
|
+
This otherone talks about O-(ω-haloalkyl)esters.
|
37
|
+
|
38
|
+
This otherone talks about O-(ω-haloalkyl)esters.
|
39
|
+
This otherone talks about O-(ω-haloalkyl)esters.
|
40
|
+
EOF
|
41
|
+
|
42
|
+
|
43
|
+
mentions = ner.match(str, "CM", false)
|
44
|
+
|
45
|
+
str_original = str.dup
|
46
|
+
mentions.each do |mention|
|
47
|
+
str[mention.range] = mention
|
48
|
+
end
|
49
|
+
|
50
|
+
assert_equal str_original, str
|
51
|
+
|
52
|
+
rescue
|
53
|
+
puts $!.message
|
54
|
+
puts $!.backtrace
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
25
58
|
end
|
@@ -1,56 +1,104 @@
|
|
1
1
|
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
-
require 'rbbt-util'
|
3
2
|
require 'rbbt/ner/regexpNER'
|
4
|
-
require 'rbbt/sources/polysearch'
|
5
|
-
require 'test/unit'
|
6
3
|
|
7
4
|
class TestRegExpNER < Test::Unit::TestCase
|
8
|
-
def
|
9
|
-
|
10
|
-
end
|
11
|
-
def _test_class
|
12
|
-
text = "a bc d e f g h i j k l m n o p q one two"
|
5
|
+
def test_match_regexp
|
6
|
+
sentence = "In this sentence I should find this and 'that'"
|
13
7
|
|
14
|
-
|
15
|
-
|
16
|
-
C2,bc,y,yy,yyy
|
17
|
-
C3,i,z,zz,zzz,m,one two
|
18
|
-
EOF
|
8
|
+
regexp = /this/
|
9
|
+
matches = RegExpNER.match_regexp(sentence, regexp)
|
19
10
|
|
20
|
-
|
21
|
-
|
11
|
+
assert_equal ["this", "this"], matches
|
12
|
+
assert_equal "In ".length, matches[0].offset
|
13
|
+
assert_equal "In this sentence I should find ".length, matches[1].offset
|
22
14
|
|
23
|
-
|
24
|
-
|
15
|
+
regexp_list = [/this/, /that/]
|
16
|
+
matches = RegExpNER.match_regexp_list(sentence, regexp_list)
|
25
17
|
|
26
|
-
|
27
|
-
assert_equal
|
18
|
+
assert_equal ["this", "this", "that"], matches
|
19
|
+
assert_equal "In ".length, matches[0].offset
|
20
|
+
assert_equal "In this sentence I should find ".length, matches[1].offset
|
28
21
|
|
22
|
+
regexp_hash = {:this => /this/, :that => /that/}
|
23
|
+
matches = RegExpNER.match_regexp_hash(sentence, regexp_hash)
|
29
24
|
|
30
|
-
|
25
|
+
assert_equal ["this", "this", "that"].sort, matches.sort
|
26
|
+
assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
|
27
|
+
assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
|
28
|
+
assert_equal :this, matches.select{|m| m.type == :this}[0].type
|
31
29
|
end
|
32
30
|
|
33
|
-
def
|
34
|
-
|
31
|
+
def test_define_regexps
|
32
|
+
sentence = "In this sentence I should find this and 'that'"
|
35
33
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
34
|
+
ner = RegExpNER.new
|
35
|
+
ner.define_regexp do
|
36
|
+
this /this/
|
37
|
+
that /that/
|
38
|
+
end
|
39
|
+
|
40
|
+
matches = ner.entities(sentence)
|
41
|
+
assert_equal ["this", "this", "that"].sort, matches.sort
|
42
|
+
assert_equal "In ".length, matches.select{|m| m.type == :this }[0].offset
|
43
|
+
assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this }[1].offset
|
44
|
+
assert_equal :this, matches.select{|m| m.type == :this }[0].type
|
45
|
+
end
|
41
46
|
|
42
|
-
file = TmpFile.tmp_file
|
43
|
-
File.open(file, 'w'){|f| f.write lexicon}
|
44
47
|
|
45
|
-
|
46
|
-
|
48
|
+
def test_entities
|
49
|
+
sentence = "In this sentence I should find this and 'that'"
|
47
50
|
|
48
|
-
|
49
|
-
|
51
|
+
ner = RegExpNER.new({:this => /this/, :that => /that/})
|
52
|
+
matches = ner.entities(sentence)
|
53
|
+
assert_equal ["this", "this", "that"].sort, matches.sort
|
54
|
+
assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
|
55
|
+
assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
|
56
|
+
assert_equal :this, matches.select{|m| m.type == :this}[0].type
|
50
57
|
|
58
|
+
Annotated.annotate(sentence)
|
59
|
+
ner_this = RegExpNER.new({:this => /this/})
|
60
|
+
ner_that = RegExpNER.new({:that => /that/})
|
61
|
+
sentence.annotations += ner_this.entities(sentence)
|
62
|
+
sentence.annotations += ner_that.entities(sentence)
|
63
|
+
matches = sentence.annotations
|
51
64
|
|
52
|
-
|
65
|
+
assert_equal ["this", "this", "that"].sort, matches.sort
|
66
|
+
assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
|
67
|
+
assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
|
68
|
+
assert_equal :this, matches.select{|m| m.type == :this}[0].type
|
53
69
|
end
|
54
|
-
end
|
55
70
|
|
71
|
+
def test_entities_captures
|
72
|
+
sentence = "In this sentence I should find this and 'that'"
|
73
|
+
|
74
|
+
ner = RegExpNER.new({:this => /this/, :that => /that/, :should => /I (should)/})
|
75
|
+
matches = ner.entities(sentence)
|
76
|
+
assert_equal ["this", "this", "that", "should"].sort, matches.sort
|
77
|
+
assert_equal "In this sentence I ".length, matches.select{|m| m.type == :should}[0].offset
|
78
|
+
assert_equal :should, matches.select{|m| m.type == :should}[0].type
|
79
|
+
end
|
56
80
|
|
81
|
+
def test_regexp_order
|
82
|
+
text =<<-EOF
|
83
|
+
* Human AUC 0-24h= 7591 ng.h/ml at 30 mg/day In mice, dietary administration of aripiprazole at doses of 1, 3, and 10 asdf mg/kg/day for 104 weeks was
|
84
|
+
associated with increased incidences of mammary tumors, namely adenocarcinomas
|
85
|
+
EOF
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
regexp = RegExpNER.new
|
90
|
+
regexp.define_regexp do
|
91
|
+
dosage /\d+\s*(?:[mnukg]{1,2}|mol)(?:\/[mnguk]{1,2})?(?:\/day|d|hour|h|minute|min|m)?/i
|
92
|
+
time /[\d\.]+\s+(?:minute|hour|day|week|mounth|year)s?/i
|
93
|
+
end
|
94
|
+
|
95
|
+
offsets = {
|
96
|
+
"7591 ng" => 21,
|
97
|
+
"30 mg/day" => 37,
|
98
|
+
"104 weeks" => 142,
|
99
|
+
}
|
100
|
+
regexp.match(text).each do |entity|
|
101
|
+
assert_equal offsets[entity], entity.offset
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/ner/token_trieNER'
|
3
|
+
require 'rbbt/util/tmpfile'
|
4
|
+
|
5
|
+
class TestTokenTrieNER < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_tokenize
|
8
|
+
assert_equal ['a' , 'b', ',', 'c'], TokenTrieNER.tokenize('a b, c')
|
9
|
+
|
10
|
+
assert_equal 10, TokenTrieNER.tokenize('123456789 12345').last.offset
|
11
|
+
assert_equal 0, TokenTrieNER.tokenize('123456789 12345').first.offset
|
12
|
+
|
13
|
+
|
14
|
+
text = '123456789 12345'
|
15
|
+
assert_equal '12345', text[TokenTrieNER.tokenize('123456789 12345').last.range]
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_merge
|
19
|
+
tokens = %w(a b c)
|
20
|
+
index = {'a' => {'b' => {'c' => {:END => [TokenTrieNER::Code.new 'CODE']}}}}
|
21
|
+
|
22
|
+
assert_equal 'CODE', TokenTrieNER.merge({}, TokenTrieNER.index_for_tokens(tokens, 'CODE'))['a']['b']['c'][:END].first.value
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_process
|
26
|
+
lexicon =<<-EOF
|
27
|
+
C1;aa;AA;bb b
|
28
|
+
C2;11;22;3 3;bb
|
29
|
+
EOF
|
30
|
+
|
31
|
+
TmpFile.with_file(lexicon) do |file|
|
32
|
+
|
33
|
+
index = TokenTrieNER.process(TSV.new(file, :sep => ';', :flatten => true))
|
34
|
+
|
35
|
+
assert_equal ['AA', 'aa', 'bb', '11', '22', '3'].sort, index.keys.sort
|
36
|
+
assert_equal [:END], index['aa'].keys
|
37
|
+
assert index['bb'].keys.include? 'b'
|
38
|
+
assert index['bb'].keys.include? :END
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_find
|
43
|
+
lexicon =<<-EOF
|
44
|
+
C1;aa;AA;bb b
|
45
|
+
C2;11;22;3 3;bb
|
46
|
+
EOF
|
47
|
+
|
48
|
+
|
49
|
+
TmpFile.with_file(lexicon) do |file|
|
50
|
+
index = TokenTrieNER.process(TSV.new(file, :sep => ';', :flatten => true))
|
51
|
+
|
52
|
+
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf'), false).first.collect{|c| c.value}.include? 'C1'
|
53
|
+
assert_equal %w(aa), TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf'), false).last
|
54
|
+
|
55
|
+
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf'), true).first.collect{|c| c.value}.include? 'C1'
|
56
|
+
|
57
|
+
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), true).first.collect{|c| c.value}.include? 'C1'
|
58
|
+
assert_equal %w(bb b), TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), true).last
|
59
|
+
|
60
|
+
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), false).first.collect{|c| c.value}.include? 'C2'
|
61
|
+
assert_equal %w(bb), TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), false).last
|
62
|
+
|
63
|
+
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb asdf'), false).first.collect{|c| c.value}.include? 'C2'
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_match
|
68
|
+
lexicon =<<-EOF
|
69
|
+
C1;aa;AA;bb b
|
70
|
+
C2;11;22;3 3;bb
|
71
|
+
EOF
|
72
|
+
|
73
|
+
TmpFile.with_file(lexicon) do |file|
|
74
|
+
index = TokenTrieNER.new(file, nil, :sep => ';')
|
75
|
+
|
76
|
+
assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def _test_polysearch_long_match
|
81
|
+
begin
|
82
|
+
require 'rbbt/sources/polysearch'
|
83
|
+
rescue
|
84
|
+
puts "Polysearch is not available. Some test have not ran."
|
85
|
+
assert true
|
86
|
+
return
|
87
|
+
end
|
88
|
+
|
89
|
+
sentence = "mammary and pituitary neoplasms as well as other drug-related mammary/reproductive tissue alterations in females were considered"
|
90
|
+
|
91
|
+
index = TokenTrieNER.new Rbbt.find_datafile('organ')
|
92
|
+
assert index.match(sentence).collect{|m| m.code}.flatten.include? 'OR00063'
|
93
|
+
|
94
|
+
index = TokenTrieNER.new Rbbt.find_datafile('disease')
|
95
|
+
assert index.match(sentence).collect{|m| m.code}.flatten.include? 'DID44386'
|
96
|
+
|
97
|
+
index = TokenTrieNER.new Rbbt.find_datafile('disease'), Rbbt.find_datafile('organ')
|
98
|
+
assert index.match(sentence).collect{|m| m.code}.flatten.include? 'DID44386'
|
99
|
+
|
100
|
+
index = TokenTrieNER.new Rbbt.find_datafile('disease'), Rbbt.find_datafile('organ')
|
101
|
+
assert index.match(sentence).collect{|m| m.code}.flatten.include? 'DID44386'
|
102
|
+
|
103
|
+
index = TokenTrieNER.new Rbbt.find_datafile('organ')
|
104
|
+
assert index.match(sentence).collect{|m| m.code}.flatten.include? 'OR00063'
|
105
|
+
index.merge Rbbt.find_datafile('disease')
|
106
|
+
assert ! index.match(sentence).collect{|m| m.code}.flatten.include?('OR00063')
|
107
|
+
assert index.match(sentence).collect{|m| m.code}.flatten.include? 'DID44386'
|
108
|
+
end
|
109
|
+
|
110
|
+
|
111
|
+
end
|
112
|
+
|