rbbt-text 0.2.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/get_ppis.rb +52 -0
- data/lib/rbbt/bow/dictionary.rb +9 -9
- data/lib/rbbt/bow/misc.rb +86 -2
- data/lib/rbbt/corpus/corpus.rb +55 -0
- data/lib/rbbt/corpus/document.rb +289 -0
- data/lib/rbbt/corpus/document_repo.rb +115 -0
- data/lib/rbbt/corpus/sources/pubmed.rb +26 -0
- data/lib/rbbt/ner/NER.rb +7 -5
- data/lib/rbbt/ner/abner.rb +13 -2
- data/lib/rbbt/ner/annotations.rb +182 -51
- data/lib/rbbt/ner/annotations/annotated.rb +15 -0
- data/lib/rbbt/ner/annotations/named_entity.rb +37 -0
- data/lib/rbbt/ner/annotations/relations.rb +25 -0
- data/lib/rbbt/ner/annotations/token.rb +28 -0
- data/lib/rbbt/ner/annotations/transformed.rb +170 -0
- data/lib/rbbt/ner/banner.rb +8 -5
- data/lib/rbbt/ner/chemical_tagger.rb +34 -0
- data/lib/rbbt/ner/ngram_prefix_dictionary.rb +136 -0
- data/lib/rbbt/ner/oscar3.rb +1 -1
- data/lib/rbbt/ner/oscar4.rb +41 -0
- data/lib/rbbt/ner/patterns.rb +132 -0
- data/lib/rbbt/ner/rnorm.rb +141 -0
- data/lib/rbbt/ner/rnorm/cue_index.rb +80 -0
- data/lib/rbbt/ner/rnorm/tokens.rb +218 -0
- data/lib/rbbt/ner/token_trieNER.rb +185 -51
- data/lib/rbbt/nlp/genia/sentence_splitter.rb +214 -0
- data/lib/rbbt/nlp/nlp.rb +235 -0
- data/share/install/software/ABNER +0 -4
- data/share/install/software/ChemicalTagger +81 -0
- data/share/install/software/Gdep +115 -0
- data/share/install/software/Geniass +118 -0
- data/share/install/software/OSCAR4 +16 -0
- data/share/install/software/StanfordParser +15 -0
- data/share/patterns/drug_induce_disease +22 -0
- data/share/rnorm/cue_default +10 -0
- data/share/rnorm/tokens_default +86 -0
- data/share/{stopwords → wordlists/stopwords} +0 -0
- data/test/rbbt/bow/test_bow.rb +1 -1
- data/test/rbbt/bow/test_dictionary.rb +1 -1
- data/test/rbbt/bow/test_misc.rb +1 -1
- data/test/rbbt/corpus/test_corpus.rb +99 -0
- data/test/rbbt/corpus/test_document.rb +222 -0
- data/test/rbbt/ner/annotations/test_named_entity.rb +14 -0
- data/test/rbbt/ner/annotations/test_transformed.rb +175 -0
- data/test/rbbt/ner/test_abner.rb +1 -1
- data/test/rbbt/ner/test_annotations.rb +64 -2
- data/test/rbbt/ner/test_banner.rb +1 -1
- data/test/rbbt/ner/test_chemical_tagger.rb +56 -0
- data/test/rbbt/ner/test_ngram_prefix_dictionary.rb +20 -0
- data/test/rbbt/ner/{test_oscar3.rb → test_oscar4.rb} +12 -13
- data/test/rbbt/ner/test_patterns.rb +66 -0
- data/test/rbbt/ner/test_regexpNER.rb +1 -1
- data/test/rbbt/ner/test_rnorm.rb +47 -0
- data/test/rbbt/ner/test_token_trieNER.rb +60 -35
- data/test/rbbt/nlp/test_nlp.rb +88 -0
- data/test/test_helper.rb +20 -0
- metadata +93 -20
@@ -0,0 +1,14 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/ner/annotations'
|
3
|
+
require 'rbbt/ner/annotations/named_entity'
|
4
|
+
require 'rbbt/ner/annotations/transformed'
|
5
|
+
|
6
|
+
class TestClass < Test::Unit::TestCase
|
7
|
+
def test_info
|
8
|
+
a = "test"
|
9
|
+
a.extend NamedEntity
|
10
|
+
assert(! a.info.keys.include?("offset"))
|
11
|
+
a.offset = 10
|
12
|
+
assert a.info.keys.include? "offset"
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,175 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/ner/annotations/transformed'
|
3
|
+
require 'rbbt/ner/annotations/named_entity'
|
4
|
+
|
5
|
+
class TestClass < Test::Unit::TestCase
|
6
|
+
def test_transform
|
7
|
+
a = "This sentence mentions the TP53 gene and the CDK5 protein"
|
8
|
+
original = a.dup
|
9
|
+
|
10
|
+
gene1 = "TP53"
|
11
|
+
gene1.extend NamedEntity
|
12
|
+
gene1.offset = a.index gene1
|
13
|
+
|
14
|
+
gene2 = "CDK5"
|
15
|
+
gene2.extend NamedEntity
|
16
|
+
gene2.offset = a.index gene2
|
17
|
+
|
18
|
+
assert_equal gene1, a[gene1.range]
|
19
|
+
assert_equal gene2, a[gene2.range]
|
20
|
+
|
21
|
+
c = a.dup
|
22
|
+
|
23
|
+
c[gene2.range] = "GN"
|
24
|
+
assert_equal c, Transformed.transform(a,[gene2], "GN")
|
25
|
+
c[gene1.range] = "GN"
|
26
|
+
assert_equal c, Transformed.transform(a,[gene1], "GN")
|
27
|
+
|
28
|
+
assert_equal gene2.offset, a.transformation_offset_differences.first.first.first
|
29
|
+
assert_equal gene1.offset, a.transformation_offset_differences.last.first.first
|
30
|
+
|
31
|
+
|
32
|
+
gene3 = "GN gene"
|
33
|
+
gene3.extend NamedEntity
|
34
|
+
gene3.offset = a.index gene3
|
35
|
+
|
36
|
+
assert_equal gene3, a[gene3.range]
|
37
|
+
|
38
|
+
a.restore([gene3])
|
39
|
+
assert_equal original, a
|
40
|
+
assert_equal "TP53 gene", a[gene3.range]
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_with_transform
|
45
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
46
|
+
original = a.dup
|
47
|
+
|
48
|
+
gene1 = "TP53"
|
49
|
+
gene1.extend NamedEntity
|
50
|
+
gene1.offset = a.index gene1
|
51
|
+
|
52
|
+
gene2 = "CDK5R1"
|
53
|
+
gene2.extend NamedEntity
|
54
|
+
gene2.offset = a.index gene2
|
55
|
+
|
56
|
+
Transformed.with_transform(a, [gene1], "GN") do
|
57
|
+
assert_equal original.sub("TP53", 'GN'), a
|
58
|
+
end
|
59
|
+
assert_equal original, a
|
60
|
+
|
61
|
+
Transformed.with_transform(a, [gene1,gene2], "GN") do
|
62
|
+
assert_equal original.gsub(/TP53|CDK5R1/, 'GN'), a
|
63
|
+
end
|
64
|
+
assert_equal original, a
|
65
|
+
|
66
|
+
Transformed.with_transform(a, [gene1], "GN") do
|
67
|
+
Transformed.with_transform(a, [gene2], "GN") do
|
68
|
+
assert_equal original.gsub(/TP53|CDK5R1/, 'GN'), a
|
69
|
+
end
|
70
|
+
assert_equal original.gsub(/TP53/, 'GN'), a
|
71
|
+
end
|
72
|
+
assert_equal original, a
|
73
|
+
|
74
|
+
exp1, exp2 = nil, nil
|
75
|
+
expanded_genes = Transformed.with_transform(a, [gene1,gene2], "GN") do
|
76
|
+
exp1 = "GN gene"
|
77
|
+
exp1.extend NamedEntity
|
78
|
+
exp1.offset = a.index exp1
|
79
|
+
exp2 = "GN protein"
|
80
|
+
exp2.extend NamedEntity
|
81
|
+
exp2.offset = a.index exp2
|
82
|
+
|
83
|
+
[exp1, exp2]
|
84
|
+
end
|
85
|
+
assert_equal original, a
|
86
|
+
|
87
|
+
assert_equal "TP53 gene", exp1
|
88
|
+
assert_equal "CDK5R1 protein", exp2
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_html
|
92
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
93
|
+
|
94
|
+
gene1 = "TP53"
|
95
|
+
gene1.extend NamedEntity
|
96
|
+
gene1.offset = a.index gene1
|
97
|
+
gene1.type = "Gene"
|
98
|
+
|
99
|
+
gene2 = "CDK5R1"
|
100
|
+
gene2.extend NamedEntity
|
101
|
+
gene2.offset = a.index gene2
|
102
|
+
gene2.type = "Protein"
|
103
|
+
|
104
|
+
Transformed.with_transform(a, [gene1,gene2], Proc.new{|e| e.html}) do
|
105
|
+
assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Gene'>TP53</span> gene and the <span class='Entity' attr-entity-type='Protein'>CDK5R1</span> protein", a
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def test_html_with_offset
|
110
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
111
|
+
Segment.annotate(a, 10)
|
112
|
+
|
113
|
+
gene1 = "TP53"
|
114
|
+
gene1.extend NamedEntity
|
115
|
+
gene1.offset = a.index gene1
|
116
|
+
gene1.offset += 10
|
117
|
+
gene1.type = "Gene"
|
118
|
+
|
119
|
+
gene2 = "CDK5R1"
|
120
|
+
gene2.extend NamedEntity
|
121
|
+
gene2.offset = a.index gene2
|
122
|
+
gene2.offset += 10
|
123
|
+
gene2.type = "Protein"
|
124
|
+
|
125
|
+
Transformed.with_transform(a, [gene1,gene2], Proc.new{|e| e.html}) do
|
126
|
+
assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Gene'>TP53</span> gene and the <span class='Entity' attr-entity-type='Protein'>CDK5R1</span> protein", a
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def test_overlap
|
131
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
132
|
+
|
133
|
+
gene1 = "TP53"
|
134
|
+
gene1.extend NamedEntity
|
135
|
+
gene1.offset = a.index gene1
|
136
|
+
gene1.type = "Gene"
|
137
|
+
|
138
|
+
gene2 = "TP53 gene"
|
139
|
+
gene2.extend NamedEntity
|
140
|
+
gene2.offset = a.index gene2
|
141
|
+
gene2.type = "Expanded Gene"
|
142
|
+
|
143
|
+
assert_equal [gene1], Segment.overlaps(Segment.sort([gene1,gene2]))
|
144
|
+
|
145
|
+
Transformed.with_transform(a, [gene1], Proc.new{|e| e.html}) do
|
146
|
+
assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Gene'>TP53</span> gene and the CDK5R1 protein", a
|
147
|
+
Transformed.with_transform(a, [gene2], Proc.new{|e| e.html}) do
|
148
|
+
assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Expanded Gene'><span class='Entity' attr-entity-type='Gene'>TP53</span> gene</span> and the CDK5R1 protein", a
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def test_cascade_with_overlap_ignored
|
154
|
+
a = "This sentence mentions the HDL-C gene and the CDK5R1 protein"
|
155
|
+
|
156
|
+
gene1 = "HDL-C"
|
157
|
+
gene1.extend NamedEntity
|
158
|
+
gene1.offset = a.index gene1
|
159
|
+
gene1.type = "Gene"
|
160
|
+
|
161
|
+
gene2 = "-"
|
162
|
+
gene2.extend NamedEntity
|
163
|
+
gene2.offset = a.index gene2
|
164
|
+
gene2.type = "Dash"
|
165
|
+
|
166
|
+
Transformed.with_transform(a, [gene1], Proc.new{|e| e.html}) do
|
167
|
+
one = a.dup
|
168
|
+
Transformed.with_transform(a, [gene2], Proc.new{|e| e.html}) do
|
169
|
+
assert_equal one, a
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
data/test/rbbt/ner/test_abner.rb
CHANGED
@@ -1,8 +1,70 @@
|
|
1
1
|
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/ner/annotations'
|
3
|
+
require 'rbbt/ner/annotations/named_entity'
|
4
|
+
require 'rbbt/ner/annotations/transformed'
|
2
5
|
|
3
6
|
class TestClass < Test::Unit::TestCase
|
4
|
-
def
|
5
|
-
|
7
|
+
def test_info
|
8
|
+
a = "test"
|
9
|
+
a.extend NamedEntity
|
10
|
+
a.type = "type"
|
11
|
+
assert a.info.keys.include? "type"
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_segment_type
|
15
|
+
a = "test"
|
16
|
+
a.extend NamedEntity
|
17
|
+
assert a.segment_types.include? "NamedEntity"
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_align
|
21
|
+
text =<<-EOF
|
22
|
+
Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors of early childhood poorly responding to therapy.
|
23
|
+
EOF
|
24
|
+
|
25
|
+
parts = text.split(/\W/)
|
26
|
+
Segment.align(text, parts)
|
27
|
+
|
28
|
+
assert_equal "Atypical teratoid/".length, parts.select{|s| s == "rhabdoid"}.first.offset
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_sort
|
32
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
33
|
+
|
34
|
+
gene1 = "TP53"
|
35
|
+
gene1.extend NamedEntity
|
36
|
+
gene1.offset = a.index gene1
|
37
|
+
gene1.type = "Gene"
|
38
|
+
|
39
|
+
gene2 = "CDK5R1"
|
40
|
+
gene2.extend NamedEntity
|
41
|
+
gene2.offset = a.index gene2
|
42
|
+
gene2.type = "Gene"
|
43
|
+
|
44
|
+
assert_equal [gene1,gene2], Segment.sort([gene2,gene1])
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_clean_sort
|
49
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
50
|
+
|
51
|
+
gene1 = "TP53"
|
52
|
+
gene1.extend NamedEntity
|
53
|
+
gene1.offset = a.index gene1
|
54
|
+
gene1.type = "Gene"
|
55
|
+
|
56
|
+
gene2 = "CDK5R1"
|
57
|
+
gene2.extend NamedEntity
|
58
|
+
gene2.offset = a.index gene2
|
59
|
+
gene2.type = "Gene"
|
60
|
+
|
61
|
+
gene3 = "TP53 gene"
|
62
|
+
gene3.extend NamedEntity
|
63
|
+
gene3.offset = a.index gene3
|
64
|
+
gene3.type = "Gene"
|
65
|
+
|
66
|
+
assert_equal [gene3,gene2], Segment.clean_sort([gene2,gene1,gene3])
|
67
|
+
|
6
68
|
end
|
7
69
|
end
|
8
70
|
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
|
2
|
+
require 'rbbt/ner/chemical_tagger'
|
3
|
+
require 'rbbt/util/tmpfile'
|
4
|
+
require 'test/unit'
|
5
|
+
|
6
|
+
class TestChemicalTagger < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def test_match
|
9
|
+
begin
|
10
|
+
ner = ChemicalTagger.new
|
11
|
+
str = "Alternatively, rearrangement of O-(w-haloalkyl)esters 34 of 2-carboethoxy-N-hydroxypyridine-2-selone affords azonianaphthalenium halides 37 in 79% yield"
|
12
|
+
mentions = ner.match(str, "CM", false)
|
13
|
+
|
14
|
+
good_mentions = ["2-carboethoxy-N-hydroxypyridine-2-selone", "O-(w-haloalkyl)esters"]
|
15
|
+
|
16
|
+
good_mentions.each{|mention|
|
17
|
+
assert(mentions.include? mention)
|
18
|
+
}
|
19
|
+
rescue
|
20
|
+
puts $!.message
|
21
|
+
puts $!.backtrace
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_ranges
|
26
|
+
begin
|
27
|
+
ner = ChemicalTagger.new
|
28
|
+
str =<<-EOF
|
29
|
+
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
30
|
+
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
31
|
+
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
32
|
+
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
33
|
+
This otherone talks about O-(w-haloalkyl)esters.
|
34
|
+
This otherone talks about O-(w-haloalkyl)esters.
|
35
|
+
This otherone talks about O-(w-haloalkyl)esters.
|
36
|
+
|
37
|
+
This otherone talks about O-(w-haloalkyl)esters.
|
38
|
+
This otherone talks about O-(w-haloalkyl)esters.
|
39
|
+
EOF
|
40
|
+
|
41
|
+
mentions = ner.match(str, "CM", false)
|
42
|
+
|
43
|
+
str_original = str.dup
|
44
|
+
mentions.each do |mention|
|
45
|
+
str[mention.range] = mention
|
46
|
+
end
|
47
|
+
|
48
|
+
assert_equal str_original, str
|
49
|
+
|
50
|
+
rescue
|
51
|
+
puts $!.message
|
52
|
+
puts $!.backtrace
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/ner/ngram_prefix_dictionary'
|
3
|
+
require 'rbbt/util/tmpfile'
|
4
|
+
|
5
|
+
class TestNGramPrefixDictionary < Test::Unit::TestCase
|
6
|
+
def test_match
|
7
|
+
lexicon =<<-EOF
|
8
|
+
C1;aa;AA;bb b
|
9
|
+
C2;11;22;3 3;bb
|
10
|
+
EOF
|
11
|
+
|
12
|
+
TmpFile.with_file(lexicon) do |file|
|
13
|
+
index = NGramPrefixDictionary.new(TSV.new(file, :flat, :sep => ';'), "test")
|
14
|
+
|
15
|
+
matches = index.match(' asdfa dsf asdf aa asdfasdf ')
|
16
|
+
assert matches.select{|m| m.code.include? 'C1'}.any?
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
@@ -1,18 +1,17 @@
|
|
1
|
-
require File.dirname(__FILE__)
|
2
|
-
require 'rbbt/ner/
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
|
2
|
+
require 'rbbt/ner/oscar4'
|
3
3
|
require 'rbbt/util/tmpfile'
|
4
4
|
require 'test/unit'
|
5
5
|
|
6
|
-
class
|
7
|
-
|
6
|
+
class TestOSCAR4 < Test::Unit::TestCase
|
8
7
|
|
9
8
|
def test_match
|
10
9
|
begin
|
11
|
-
ner =
|
12
|
-
str = "Alternatively, rearrangement of O-(
|
10
|
+
ner = OSCAR4.new
|
11
|
+
str = "Alternatively, rearrangement of O-(w-haloalkyl)esters 34 of 2-carboethoxy-N-hydroxypyridine-2-selone affords azonianaphthalenium halides 37 in 79% yield"
|
13
12
|
|
14
13
|
mentions = ner.match(str, "CM", false)
|
15
|
-
good_mentions = ["2-carboethoxy-N-hydroxypyridine-2-selone", "O-(
|
14
|
+
good_mentions = ["2-carboethoxy-N-hydroxypyridine-2-selone", "O-(w-haloalkyl)esters"]
|
16
15
|
|
17
16
|
good_mentions.each{|mention|
|
18
17
|
assert(mentions.include? mention)
|
@@ -25,18 +24,18 @@ class TestOSCAR3 < Test::Unit::TestCase
|
|
25
24
|
|
26
25
|
def test_ranges
|
27
26
|
begin
|
28
|
-
ner =
|
27
|
+
ner = OSCAR4.new
|
29
28
|
str =<<-EOF
|
30
29
|
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
31
30
|
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
32
31
|
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
33
32
|
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
34
|
-
This otherone talks about O-(
|
35
|
-
This otherone talks about O-(
|
36
|
-
This otherone talks about O-(
|
33
|
+
This otherone talks about O-(w-haloalkyl)esters.
|
34
|
+
This otherone talks about O-(w-haloalkyl)esters.
|
35
|
+
This otherone talks about O-(w-haloalkyl)esters.
|
37
36
|
|
38
|
-
This otherone talks about O-(
|
39
|
-
This otherone talks about O-(
|
37
|
+
This otherone talks about O-(w-haloalkyl)esters.
|
38
|
+
This otherone talks about O-(w-haloalkyl)esters.
|
40
39
|
EOF
|
41
40
|
|
42
41
|
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
|
2
|
+
require 'rbbt/ner/patterns'
|
3
|
+
|
4
|
+
class TestPatternRelExt < Test::Unit::TestCase
|
5
|
+
def test_simple_pattern
|
6
|
+
text = "Experiments have shown that TP53 interacts with CDK5 under certain conditions"
|
7
|
+
|
8
|
+
gene1 = "TP53"
|
9
|
+
NamedEntity.annotate(gene1, text.index(gene1), "Gene")
|
10
|
+
|
11
|
+
gene2 = "CDK5"
|
12
|
+
NamedEntity.annotate(gene2, text.index(gene2), "Gene")
|
13
|
+
|
14
|
+
interaction = "interacts"
|
15
|
+
NamedEntity.annotate(interaction, text.index(interaction), "Interaction")
|
16
|
+
|
17
|
+
Annotated.annotate(text, [gene1, gene2, interaction])
|
18
|
+
|
19
|
+
assert_equal "TP53 interacts with CDK5", PatternRelExt.simple_pattern(text, "GENE INTERACTION with GENE").first
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_chunk_pattern
|
24
|
+
text = "Experiments have shown that TP53 found in cultivated cells interacts with CDK5 under certain conditions"
|
25
|
+
|
26
|
+
gene1 = "TP53"
|
27
|
+
NamedEntity.annotate(gene1, text.index(gene1), "Gene")
|
28
|
+
|
29
|
+
gene2 = "CDK5"
|
30
|
+
NamedEntity.annotate(gene2, text.index(gene2), "Gene")
|
31
|
+
|
32
|
+
interaction = "interacts"
|
33
|
+
NamedEntity.annotate(interaction, text.index(interaction), "Interaction")
|
34
|
+
|
35
|
+
Annotated.annotate(text, {:entities => [gene1, gene2, interaction]})
|
36
|
+
|
37
|
+
assert_equal "TP53 found in cultivated cells interacts with CDK5",
|
38
|
+
PatternRelExt.new("NP[entity:Gene] VP[stem:interacts] with NP[entity:Gene]").match_sentences([text]).first.first
|
39
|
+
|
40
|
+
assert_equal "TP53 found in cultivated cells interacts with CDK5",
|
41
|
+
PatternRelExt.new(["NP[entity:Gene] VP[stem:interacts] with NP[entity:Gene]"]).match_sentences([text]).first.first
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_chunk_pattern
|
45
|
+
text = "There is a concern with the use of thiazolidinediones in patients with an increased risk of colon cancer (e.g., familial colon polyposis)."
|
46
|
+
|
47
|
+
drug = "thiazolidinediones"
|
48
|
+
NamedEntity.annotate(drug, text.index(drug), "Chemical Mention")
|
49
|
+
|
50
|
+
disease = "colon cancer"
|
51
|
+
NamedEntity.annotate(disease, text.index(disease), "disease")
|
52
|
+
|
53
|
+
Annotated.annotate(text, {:entitites => [drug, disease]})
|
54
|
+
|
55
|
+
assert_equal "thiazolidinediones in patients with an increased risk of colon cancer",
|
56
|
+
PatternRelExt.new("NP[entity:Chemical Mention] NP[stem:risk] NP[entity:disease]").match_sentences([text]).first.first
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
def test_entities_with_spaces
|
62
|
+
PatternRelExt.new("NP[entity:Gene Name]").token_trie
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
end
|