rbbt-text 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rbbt/bow/dictionary.rb +1 -1
- data/lib/rbbt/bow/misc.rb +2 -2
- data/lib/rbbt/ner/NER.rb +22 -0
- data/lib/rbbt/ner/abner.rb +8 -4
- data/lib/rbbt/ner/annotations.rb +123 -0
- data/lib/rbbt/ner/banner.rb +6 -4
- data/lib/rbbt/ner/oscar3.rb +29 -13
- data/lib/rbbt/ner/regexpNER.rb +69 -45
- data/lib/rbbt/ner/token_trieNER.rb +168 -0
- data/test/rbbt/ner/test_NER.rb +10 -0
- data/test/rbbt/ner/test_abner.rb +2 -2
- data/test/rbbt/ner/test_annotations.rb +8 -0
- data/test/rbbt/ner/test_banner.rb +2 -2
- data/test/rbbt/ner/test_oscar3.rb +35 -2
- data/test/rbbt/ner/test_regexpNER.rb +83 -35
- data/test/rbbt/ner/test_token_trieNER.rb +112 -0
- metadata +15 -12
- data/lib/rbbt/ner/named_entity.rb +0 -11
- data/lib/rbbt/ner/tokenNER.rb +0 -237
- data/test/rbbt/ner/test_named_entity.rb +0 -16
- data/test/rbbt/ner/test_tokenNER.rb +0 -239
@@ -0,0 +1,168 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/util/tsv'
|
3
|
+
require 'rbbt/ner/annotations'
|
4
|
+
require 'rbbt/ner/NER'
|
5
|
+
|
6
|
+
class TokenTrieNER < NER
|
7
|
+
def self.clean(token)
|
8
|
+
if token.length > 3
|
9
|
+
token.downcase
|
10
|
+
else
|
11
|
+
token
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.prepare_token(token, start)
|
16
|
+
Token.annotate(clean(token), start, token)
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.tokenize(text, split_at = /\s|(\(|\)|[-."':,])/, start = 0)
|
20
|
+
|
21
|
+
tokens = []
|
22
|
+
while matchdata = text.match(split_at)
|
23
|
+
tokens << prepare_token(matchdata.pre_match, start) unless matchdata.pre_match.empty?
|
24
|
+
tokens << prepare_token(matchdata.captures.first, start + matchdata.begin(1)) if matchdata.captures.any? and not matchdata.captures.first.empty?
|
25
|
+
start += matchdata.end(0)
|
26
|
+
text = matchdata.post_match
|
27
|
+
end
|
28
|
+
tokens << prepare_token(text, start) unless text.empty?
|
29
|
+
|
30
|
+
tokens
|
31
|
+
end
|
32
|
+
|
33
|
+
#{{{ Process dictionary
|
34
|
+
|
35
|
+
class Code
|
36
|
+
attr_accessor :value, :type
|
37
|
+
def initialize(value, type = nil)
|
38
|
+
@value = value
|
39
|
+
@type = type
|
40
|
+
end
|
41
|
+
|
42
|
+
def to_s
|
43
|
+
[type, value] * ":"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.index_for_tokens(tokens, code, type = nil)
|
48
|
+
if tokens.empty?
|
49
|
+
{:END => [Code.new code, type]}
|
50
|
+
else
|
51
|
+
{tokens.shift => index_for_tokens(tokens, code, type)}
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.merge(index1, index2)
|
56
|
+
index2.each do |key, new_index2|
|
57
|
+
case
|
58
|
+
when key == :END
|
59
|
+
index1[:END] ||= []
|
60
|
+
index1[:END] += new_index2.reject{|new| index1[:END].collect{|e| e.to_s }.include? new.to_s }
|
61
|
+
index1[:END].uniq!
|
62
|
+
when index1.include?(key)
|
63
|
+
merge(index1[key], new_index2)
|
64
|
+
else
|
65
|
+
index1[key] = new_index2
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def self.process(hash, type = nil)
|
71
|
+
index = {}
|
72
|
+
hash.each do |code, names|
|
73
|
+
names.flatten.each do |name|
|
74
|
+
next if name.empty? or name.length < 2
|
75
|
+
tokens = tokenize name
|
76
|
+
|
77
|
+
merge(index, index_for_tokens(tokens, code, type)) unless tokens.empty?
|
78
|
+
end
|
79
|
+
end
|
80
|
+
index
|
81
|
+
end
|
82
|
+
|
83
|
+
#{{{ Matching
|
84
|
+
|
85
|
+
def self.find(index, tokens, longest_match = true)
|
86
|
+
return nil unless index.include? tokens.first
|
87
|
+
|
88
|
+
head = tokens.shift
|
89
|
+
next_index = index[head]
|
90
|
+
|
91
|
+
if tokens.empty?
|
92
|
+
if next_index.include? :END
|
93
|
+
return [next_index[:END], [head]]
|
94
|
+
else
|
95
|
+
tokens.unshift head
|
96
|
+
return nil
|
97
|
+
end
|
98
|
+
else
|
99
|
+
|
100
|
+
return [next_index[:END], [head]] if next_index.include?(:END) and not longest_match
|
101
|
+
|
102
|
+
matches = find(next_index, tokens)
|
103
|
+
if not matches.nil?
|
104
|
+
matches.last.unshift head
|
105
|
+
return matches
|
106
|
+
end
|
107
|
+
|
108
|
+
return [next_index[:END], [head]] if next_index.include?(:END)
|
109
|
+
|
110
|
+
tokens.unshift head
|
111
|
+
return nil
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def self.make_match(match_tokens, type, codes)
|
116
|
+
match = ""
|
117
|
+
match_offset = match_tokens.first.offset
|
118
|
+
match_tokens.each{|t|
|
119
|
+
match << " " * (t.offset - (match_offset + match.length)) if t.offset > (match_offset + match.length)
|
120
|
+
match << t.original
|
121
|
+
}
|
122
|
+
|
123
|
+
NamedEntity.annotate(match, match_tokens.first.offset, type, codes)
|
124
|
+
end
|
125
|
+
|
126
|
+
attr_accessor :index, :longest_match, :type
|
127
|
+
def initialize(file, type = nil, options = {})
|
128
|
+
options = Misc.add_defaults options, :flatten => true, :longest_match => true
|
129
|
+
@longest_match = options.delete :longest_match
|
130
|
+
|
131
|
+
file = [file] unless Array === file
|
132
|
+
@index = {}
|
133
|
+
file.each do |f| TokenTrieNER.merge(@index, TokenTrieNER.process(TSV.new(f, options), type)) end
|
134
|
+
end
|
135
|
+
|
136
|
+
def merge(new, type = nil)
|
137
|
+
case
|
138
|
+
when TokenTrieNER === new
|
139
|
+
TokenTrieNER.merge(@index, new.index)
|
140
|
+
when Hash === new
|
141
|
+
TokenTrieNER.merge(@index, new)
|
142
|
+
when TSV === new
|
143
|
+
TokenTrieNER.merge(@index, TokenTrieNER.process(new,type))
|
144
|
+
when String === new
|
145
|
+
TokenTrieNER.merge(@index, TokenTrieNER.process(TSV.new(new, :flatten => true), type))
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def match(text)
|
150
|
+
tokens = TokenTrieNER.tokenize text
|
151
|
+
|
152
|
+
matches = []
|
153
|
+
while tokens.any?
|
154
|
+
new_matches = TokenTrieNER.find(@index, tokens, longest_match)
|
155
|
+
|
156
|
+
if new_matches
|
157
|
+
codes, match_tokens = new_matches
|
158
|
+
matches << TokenTrieNER.make_match(match_tokens, codes.collect{|c| c.type}, codes.collect{|c| c.value})
|
159
|
+
else
|
160
|
+
tokens.shift
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
matches
|
165
|
+
end
|
166
|
+
|
167
|
+
end
|
168
|
+
|
data/test/rbbt/ner/test_abner.rb
CHANGED
@@ -4,11 +4,11 @@ require 'test/unit'
|
|
4
4
|
|
5
5
|
class TestAbner < Test::Unit::TestCase
|
6
6
|
|
7
|
-
def
|
7
|
+
def test_match
|
8
8
|
begin
|
9
9
|
ner = Abner.new
|
10
10
|
|
11
|
-
mentions = ner.
|
11
|
+
mentions = ner.match(" The P-ITIM-compelled multi-phosphoprotein complex binds to and activates SHP-2, which in turn dephosphorylates SHIP and Shc and probably other substrates.")
|
12
12
|
["SHP-2", "SHIP", "Shc"].each{|mention|
|
13
13
|
assert(mentions.include? mention)
|
14
14
|
}
|
@@ -4,11 +4,11 @@ require 'test/unit'
|
|
4
4
|
|
5
5
|
class TestBanner < Test::Unit::TestCase
|
6
6
|
|
7
|
-
def
|
7
|
+
def test_match
|
8
8
|
begin
|
9
9
|
ner = Banner.new
|
10
10
|
|
11
|
-
mentions = ner.
|
11
|
+
mentions = ner.match(" The P-ITIM-compelled multi-phosphoprotein complex binds to and activates SHP-2, which in turn dephosphorylates SHIP and Shc and probably other substrates.")
|
12
12
|
["SHP - 2", "SHIP", "Shc"].each{|mention|
|
13
13
|
assert(mentions.include? mention)
|
14
14
|
}
|
@@ -6,12 +6,12 @@ require 'test/unit'
|
|
6
6
|
class TestOSCAR3 < Test::Unit::TestCase
|
7
7
|
|
8
8
|
|
9
|
-
def
|
9
|
+
def test_match
|
10
10
|
begin
|
11
11
|
ner = OSCAR3.new
|
12
12
|
str = "Alternatively, rearrangement of O-(ω-haloalkyl)esters 34 of 2-carboethoxy-N-hydroxypyridine-2-selone affords azonianaphthalenium halides 37 in 79% yield"
|
13
13
|
|
14
|
-
mentions = ner.
|
14
|
+
mentions = ner.match(str, "CM", false)
|
15
15
|
good_mentions = ["2-carboethoxy-N-hydroxypyridine-2-selone", "O-(ω-haloalkyl)esters"]
|
16
16
|
|
17
17
|
good_mentions.each{|mention|
|
@@ -22,4 +22,37 @@ class TestOSCAR3 < Test::Unit::TestCase
|
|
22
22
|
puts $!.backtrace
|
23
23
|
end
|
24
24
|
end
|
25
|
+
|
26
|
+
def test_ranges
|
27
|
+
begin
|
28
|
+
ner = OSCAR3.new
|
29
|
+
str =<<-EOF
|
30
|
+
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
31
|
+
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
32
|
+
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
33
|
+
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
34
|
+
This otherone talks about O-(ω-haloalkyl)esters.
|
35
|
+
This otherone talks about O-(ω-haloalkyl)esters.
|
36
|
+
This otherone talks about O-(ω-haloalkyl)esters.
|
37
|
+
|
38
|
+
This otherone talks about O-(ω-haloalkyl)esters.
|
39
|
+
This otherone talks about O-(ω-haloalkyl)esters.
|
40
|
+
EOF
|
41
|
+
|
42
|
+
|
43
|
+
mentions = ner.match(str, "CM", false)
|
44
|
+
|
45
|
+
str_original = str.dup
|
46
|
+
mentions.each do |mention|
|
47
|
+
str[mention.range] = mention
|
48
|
+
end
|
49
|
+
|
50
|
+
assert_equal str_original, str
|
51
|
+
|
52
|
+
rescue
|
53
|
+
puts $!.message
|
54
|
+
puts $!.backtrace
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
25
58
|
end
|
@@ -1,56 +1,104 @@
|
|
1
1
|
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
-
require 'rbbt-util'
|
3
2
|
require 'rbbt/ner/regexpNER'
|
4
|
-
require 'rbbt/sources/polysearch'
|
5
|
-
require 'test/unit'
|
6
3
|
|
7
4
|
class TestRegExpNER < Test::Unit::TestCase
|
8
|
-
def
|
9
|
-
|
10
|
-
end
|
11
|
-
def _test_class
|
12
|
-
text = "a bc d e f g h i j k l m n o p q one two"
|
5
|
+
def test_match_regexp
|
6
|
+
sentence = "In this sentence I should find this and 'that'"
|
13
7
|
|
14
|
-
|
15
|
-
|
16
|
-
C2,bc,y,yy,yyy
|
17
|
-
C3,i,z,zz,zzz,m,one two
|
18
|
-
EOF
|
8
|
+
regexp = /this/
|
9
|
+
matches = RegExpNER.match_regexp(sentence, regexp)
|
19
10
|
|
20
|
-
|
21
|
-
|
11
|
+
assert_equal ["this", "this"], matches
|
12
|
+
assert_equal "In ".length, matches[0].offset
|
13
|
+
assert_equal "In this sentence I should find ".length, matches[1].offset
|
22
14
|
|
23
|
-
|
24
|
-
|
15
|
+
regexp_list = [/this/, /that/]
|
16
|
+
matches = RegExpNER.match_regexp_list(sentence, regexp_list)
|
25
17
|
|
26
|
-
|
27
|
-
assert_equal
|
18
|
+
assert_equal ["this", "this", "that"], matches
|
19
|
+
assert_equal "In ".length, matches[0].offset
|
20
|
+
assert_equal "In this sentence I should find ".length, matches[1].offset
|
28
21
|
|
22
|
+
regexp_hash = {:this => /this/, :that => /that/}
|
23
|
+
matches = RegExpNER.match_regexp_hash(sentence, regexp_hash)
|
29
24
|
|
30
|
-
|
25
|
+
assert_equal ["this", "this", "that"].sort, matches.sort
|
26
|
+
assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
|
27
|
+
assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
|
28
|
+
assert_equal :this, matches.select{|m| m.type == :this}[0].type
|
31
29
|
end
|
32
30
|
|
33
|
-
def
|
34
|
-
|
31
|
+
def test_define_regexps
|
32
|
+
sentence = "In this sentence I should find this and 'that'"
|
35
33
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
34
|
+
ner = RegExpNER.new
|
35
|
+
ner.define_regexp do
|
36
|
+
this /this/
|
37
|
+
that /that/
|
38
|
+
end
|
39
|
+
|
40
|
+
matches = ner.entities(sentence)
|
41
|
+
assert_equal ["this", "this", "that"].sort, matches.sort
|
42
|
+
assert_equal "In ".length, matches.select{|m| m.type == :this }[0].offset
|
43
|
+
assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this }[1].offset
|
44
|
+
assert_equal :this, matches.select{|m| m.type == :this }[0].type
|
45
|
+
end
|
41
46
|
|
42
|
-
file = TmpFile.tmp_file
|
43
|
-
File.open(file, 'w'){|f| f.write lexicon}
|
44
47
|
|
45
|
-
|
46
|
-
|
48
|
+
def test_entities
|
49
|
+
sentence = "In this sentence I should find this and 'that'"
|
47
50
|
|
48
|
-
|
49
|
-
|
51
|
+
ner = RegExpNER.new({:this => /this/, :that => /that/})
|
52
|
+
matches = ner.entities(sentence)
|
53
|
+
assert_equal ["this", "this", "that"].sort, matches.sort
|
54
|
+
assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
|
55
|
+
assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
|
56
|
+
assert_equal :this, matches.select{|m| m.type == :this}[0].type
|
50
57
|
|
58
|
+
Annotated.annotate(sentence)
|
59
|
+
ner_this = RegExpNER.new({:this => /this/})
|
60
|
+
ner_that = RegExpNER.new({:that => /that/})
|
61
|
+
sentence.annotations += ner_this.entities(sentence)
|
62
|
+
sentence.annotations += ner_that.entities(sentence)
|
63
|
+
matches = sentence.annotations
|
51
64
|
|
52
|
-
|
65
|
+
assert_equal ["this", "this", "that"].sort, matches.sort
|
66
|
+
assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
|
67
|
+
assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
|
68
|
+
assert_equal :this, matches.select{|m| m.type == :this}[0].type
|
53
69
|
end
|
54
|
-
end
|
55
70
|
|
71
|
+
def test_entities_captures
|
72
|
+
sentence = "In this sentence I should find this and 'that'"
|
73
|
+
|
74
|
+
ner = RegExpNER.new({:this => /this/, :that => /that/, :should => /I (should)/})
|
75
|
+
matches = ner.entities(sentence)
|
76
|
+
assert_equal ["this", "this", "that", "should"].sort, matches.sort
|
77
|
+
assert_equal "In this sentence I ".length, matches.select{|m| m.type == :should}[0].offset
|
78
|
+
assert_equal :should, matches.select{|m| m.type == :should}[0].type
|
79
|
+
end
|
56
80
|
|
81
|
+
def test_regexp_order
|
82
|
+
text =<<-EOF
|
83
|
+
* Human AUC 0-24h= 7591 ng.h/ml at 30 mg/day In mice, dietary administration of aripiprazole at doses of 1, 3, and 10 asdf mg/kg/day for 104 weeks was
|
84
|
+
associated with increased incidences of mammary tumors, namely adenocarcinomas
|
85
|
+
EOF
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
regexp = RegExpNER.new
|
90
|
+
regexp.define_regexp do
|
91
|
+
dosage /\d+\s*(?:[mnukg]{1,2}|mol)(?:\/[mnguk]{1,2})?(?:\/day|d|hour|h|minute|min|m)?/i
|
92
|
+
time /[\d\.]+\s+(?:minute|hour|day|week|mounth|year)s?/i
|
93
|
+
end
|
94
|
+
|
95
|
+
offsets = {
|
96
|
+
"7591 ng" => 21,
|
97
|
+
"30 mg/day" => 37,
|
98
|
+
"104 weeks" => 142,
|
99
|
+
}
|
100
|
+
regexp.match(text).each do |entity|
|
101
|
+
assert_equal offsets[entity], entity.offset
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/ner/token_trieNER'
|
3
|
+
require 'rbbt/util/tmpfile'
|
4
|
+
|
5
|
+
class TestTokenTrieNER < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_tokenize
|
8
|
+
assert_equal ['a' , 'b', ',', 'c'], TokenTrieNER.tokenize('a b, c')
|
9
|
+
|
10
|
+
assert_equal 10, TokenTrieNER.tokenize('123456789 12345').last.offset
|
11
|
+
assert_equal 0, TokenTrieNER.tokenize('123456789 12345').first.offset
|
12
|
+
|
13
|
+
|
14
|
+
text = '123456789 12345'
|
15
|
+
assert_equal '12345', text[TokenTrieNER.tokenize('123456789 12345').last.range]
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_merge
|
19
|
+
tokens = %w(a b c)
|
20
|
+
index = {'a' => {'b' => {'c' => {:END => [TokenTrieNER::Code.new 'CODE']}}}}
|
21
|
+
|
22
|
+
assert_equal 'CODE', TokenTrieNER.merge({}, TokenTrieNER.index_for_tokens(tokens, 'CODE'))['a']['b']['c'][:END].first.value
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_process
|
26
|
+
lexicon =<<-EOF
|
27
|
+
C1;aa;AA;bb b
|
28
|
+
C2;11;22;3 3;bb
|
29
|
+
EOF
|
30
|
+
|
31
|
+
TmpFile.with_file(lexicon) do |file|
|
32
|
+
|
33
|
+
index = TokenTrieNER.process(TSV.new(file, :sep => ';', :flatten => true))
|
34
|
+
|
35
|
+
assert_equal ['AA', 'aa', 'bb', '11', '22', '3'].sort, index.keys.sort
|
36
|
+
assert_equal [:END], index['aa'].keys
|
37
|
+
assert index['bb'].keys.include? 'b'
|
38
|
+
assert index['bb'].keys.include? :END
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_find
|
43
|
+
lexicon =<<-EOF
|
44
|
+
C1;aa;AA;bb b
|
45
|
+
C2;11;22;3 3;bb
|
46
|
+
EOF
|
47
|
+
|
48
|
+
|
49
|
+
TmpFile.with_file(lexicon) do |file|
|
50
|
+
index = TokenTrieNER.process(TSV.new(file, :sep => ';', :flatten => true))
|
51
|
+
|
52
|
+
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf'), false).first.collect{|c| c.value}.include? 'C1'
|
53
|
+
assert_equal %w(aa), TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf'), false).last
|
54
|
+
|
55
|
+
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf'), true).first.collect{|c| c.value}.include? 'C1'
|
56
|
+
|
57
|
+
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), true).first.collect{|c| c.value}.include? 'C1'
|
58
|
+
assert_equal %w(bb b), TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), true).last
|
59
|
+
|
60
|
+
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), false).first.collect{|c| c.value}.include? 'C2'
|
61
|
+
assert_equal %w(bb), TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), false).last
|
62
|
+
|
63
|
+
assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb asdf'), false).first.collect{|c| c.value}.include? 'C2'
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_match
|
68
|
+
lexicon =<<-EOF
|
69
|
+
C1;aa;AA;bb b
|
70
|
+
C2;11;22;3 3;bb
|
71
|
+
EOF
|
72
|
+
|
73
|
+
TmpFile.with_file(lexicon) do |file|
|
74
|
+
index = TokenTrieNER.new(file, nil, :sep => ';')
|
75
|
+
|
76
|
+
assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def _test_polysearch_long_match
|
81
|
+
begin
|
82
|
+
require 'rbbt/sources/polysearch'
|
83
|
+
rescue
|
84
|
+
puts "Polysearch is not available. Some test have not ran."
|
85
|
+
assert true
|
86
|
+
return
|
87
|
+
end
|
88
|
+
|
89
|
+
sentence = "mammary and pituitary neoplasms as well as other drug-related mammary/reproductive tissue alterations in females were considered"
|
90
|
+
|
91
|
+
index = TokenTrieNER.new Rbbt.find_datafile('organ')
|
92
|
+
assert index.match(sentence).collect{|m| m.code}.flatten.include? 'OR00063'
|
93
|
+
|
94
|
+
index = TokenTrieNER.new Rbbt.find_datafile('disease')
|
95
|
+
assert index.match(sentence).collect{|m| m.code}.flatten.include? 'DID44386'
|
96
|
+
|
97
|
+
index = TokenTrieNER.new Rbbt.find_datafile('disease'), Rbbt.find_datafile('organ')
|
98
|
+
assert index.match(sentence).collect{|m| m.code}.flatten.include? 'DID44386'
|
99
|
+
|
100
|
+
index = TokenTrieNER.new Rbbt.find_datafile('disease'), Rbbt.find_datafile('organ')
|
101
|
+
assert index.match(sentence).collect{|m| m.code}.flatten.include? 'DID44386'
|
102
|
+
|
103
|
+
index = TokenTrieNER.new Rbbt.find_datafile('organ')
|
104
|
+
assert index.match(sentence).collect{|m| m.code}.flatten.include? 'OR00063'
|
105
|
+
index.merge Rbbt.find_datafile('disease')
|
106
|
+
assert ! index.match(sentence).collect{|m| m.code}.flatten.include?('OR00063')
|
107
|
+
assert index.match(sentence).collect{|m| m.code}.flatten.include? 'DID44386'
|
108
|
+
end
|
109
|
+
|
110
|
+
|
111
|
+
end
|
112
|
+
|