rbbt-text 0.2.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/get_ppis.rb +52 -0
- data/lib/rbbt/bow/dictionary.rb +9 -9
- data/lib/rbbt/bow/misc.rb +86 -2
- data/lib/rbbt/corpus/corpus.rb +55 -0
- data/lib/rbbt/corpus/document.rb +289 -0
- data/lib/rbbt/corpus/document_repo.rb +115 -0
- data/lib/rbbt/corpus/sources/pubmed.rb +26 -0
- data/lib/rbbt/ner/NER.rb +7 -5
- data/lib/rbbt/ner/abner.rb +13 -2
- data/lib/rbbt/ner/annotations.rb +182 -51
- data/lib/rbbt/ner/annotations/annotated.rb +15 -0
- data/lib/rbbt/ner/annotations/named_entity.rb +37 -0
- data/lib/rbbt/ner/annotations/relations.rb +25 -0
- data/lib/rbbt/ner/annotations/token.rb +28 -0
- data/lib/rbbt/ner/annotations/transformed.rb +170 -0
- data/lib/rbbt/ner/banner.rb +8 -5
- data/lib/rbbt/ner/chemical_tagger.rb +34 -0
- data/lib/rbbt/ner/ngram_prefix_dictionary.rb +136 -0
- data/lib/rbbt/ner/oscar3.rb +1 -1
- data/lib/rbbt/ner/oscar4.rb +41 -0
- data/lib/rbbt/ner/patterns.rb +132 -0
- data/lib/rbbt/ner/rnorm.rb +141 -0
- data/lib/rbbt/ner/rnorm/cue_index.rb +80 -0
- data/lib/rbbt/ner/rnorm/tokens.rb +218 -0
- data/lib/rbbt/ner/token_trieNER.rb +185 -51
- data/lib/rbbt/nlp/genia/sentence_splitter.rb +214 -0
- data/lib/rbbt/nlp/nlp.rb +235 -0
- data/share/install/software/ABNER +0 -4
- data/share/install/software/ChemicalTagger +81 -0
- data/share/install/software/Gdep +115 -0
- data/share/install/software/Geniass +118 -0
- data/share/install/software/OSCAR4 +16 -0
- data/share/install/software/StanfordParser +15 -0
- data/share/patterns/drug_induce_disease +22 -0
- data/share/rnorm/cue_default +10 -0
- data/share/rnorm/tokens_default +86 -0
- data/share/{stopwords → wordlists/stopwords} +0 -0
- data/test/rbbt/bow/test_bow.rb +1 -1
- data/test/rbbt/bow/test_dictionary.rb +1 -1
- data/test/rbbt/bow/test_misc.rb +1 -1
- data/test/rbbt/corpus/test_corpus.rb +99 -0
- data/test/rbbt/corpus/test_document.rb +222 -0
- data/test/rbbt/ner/annotations/test_named_entity.rb +14 -0
- data/test/rbbt/ner/annotations/test_transformed.rb +175 -0
- data/test/rbbt/ner/test_abner.rb +1 -1
- data/test/rbbt/ner/test_annotations.rb +64 -2
- data/test/rbbt/ner/test_banner.rb +1 -1
- data/test/rbbt/ner/test_chemical_tagger.rb +56 -0
- data/test/rbbt/ner/test_ngram_prefix_dictionary.rb +20 -0
- data/test/rbbt/ner/{test_oscar3.rb → test_oscar4.rb} +12 -13
- data/test/rbbt/ner/test_patterns.rb +66 -0
- data/test/rbbt/ner/test_regexpNER.rb +1 -1
- data/test/rbbt/ner/test_rnorm.rb +47 -0
- data/test/rbbt/ner/test_token_trieNER.rb +60 -35
- data/test/rbbt/nlp/test_nlp.rb +88 -0
- data/test/test_helper.rb +20 -0
- metadata +93 -20
@@ -0,0 +1,14 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/ner/annotations'
|
3
|
+
require 'rbbt/ner/annotations/named_entity'
|
4
|
+
require 'rbbt/ner/annotations/transformed'
|
5
|
+
|
6
|
+
class TestClass < Test::Unit::TestCase
|
7
|
+
def test_info
|
8
|
+
a = "test"
|
9
|
+
a.extend NamedEntity
|
10
|
+
assert(! a.info.keys.include?("offset"))
|
11
|
+
a.offset = 10
|
12
|
+
assert a.info.keys.include? "offset"
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,175 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/ner/annotations/transformed'
|
3
|
+
require 'rbbt/ner/annotations/named_entity'
|
4
|
+
|
5
|
+
class TestClass < Test::Unit::TestCase
|
6
|
+
def test_transform
|
7
|
+
a = "This sentence mentions the TP53 gene and the CDK5 protein"
|
8
|
+
original = a.dup
|
9
|
+
|
10
|
+
gene1 = "TP53"
|
11
|
+
gene1.extend NamedEntity
|
12
|
+
gene1.offset = a.index gene1
|
13
|
+
|
14
|
+
gene2 = "CDK5"
|
15
|
+
gene2.extend NamedEntity
|
16
|
+
gene2.offset = a.index gene2
|
17
|
+
|
18
|
+
assert_equal gene1, a[gene1.range]
|
19
|
+
assert_equal gene2, a[gene2.range]
|
20
|
+
|
21
|
+
c = a.dup
|
22
|
+
|
23
|
+
c[gene2.range] = "GN"
|
24
|
+
assert_equal c, Transformed.transform(a,[gene2], "GN")
|
25
|
+
c[gene1.range] = "GN"
|
26
|
+
assert_equal c, Transformed.transform(a,[gene1], "GN")
|
27
|
+
|
28
|
+
assert_equal gene2.offset, a.transformation_offset_differences.first.first.first
|
29
|
+
assert_equal gene1.offset, a.transformation_offset_differences.last.first.first
|
30
|
+
|
31
|
+
|
32
|
+
gene3 = "GN gene"
|
33
|
+
gene3.extend NamedEntity
|
34
|
+
gene3.offset = a.index gene3
|
35
|
+
|
36
|
+
assert_equal gene3, a[gene3.range]
|
37
|
+
|
38
|
+
a.restore([gene3])
|
39
|
+
assert_equal original, a
|
40
|
+
assert_equal "TP53 gene", a[gene3.range]
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_with_transform
|
45
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
46
|
+
original = a.dup
|
47
|
+
|
48
|
+
gene1 = "TP53"
|
49
|
+
gene1.extend NamedEntity
|
50
|
+
gene1.offset = a.index gene1
|
51
|
+
|
52
|
+
gene2 = "CDK5R1"
|
53
|
+
gene2.extend NamedEntity
|
54
|
+
gene2.offset = a.index gene2
|
55
|
+
|
56
|
+
Transformed.with_transform(a, [gene1], "GN") do
|
57
|
+
assert_equal original.sub("TP53", 'GN'), a
|
58
|
+
end
|
59
|
+
assert_equal original, a
|
60
|
+
|
61
|
+
Transformed.with_transform(a, [gene1,gene2], "GN") do
|
62
|
+
assert_equal original.gsub(/TP53|CDK5R1/, 'GN'), a
|
63
|
+
end
|
64
|
+
assert_equal original, a
|
65
|
+
|
66
|
+
Transformed.with_transform(a, [gene1], "GN") do
|
67
|
+
Transformed.with_transform(a, [gene2], "GN") do
|
68
|
+
assert_equal original.gsub(/TP53|CDK5R1/, 'GN'), a
|
69
|
+
end
|
70
|
+
assert_equal original.gsub(/TP53/, 'GN'), a
|
71
|
+
end
|
72
|
+
assert_equal original, a
|
73
|
+
|
74
|
+
exp1, exp2 = nil, nil
|
75
|
+
expanded_genes = Transformed.with_transform(a, [gene1,gene2], "GN") do
|
76
|
+
exp1 = "GN gene"
|
77
|
+
exp1.extend NamedEntity
|
78
|
+
exp1.offset = a.index exp1
|
79
|
+
exp2 = "GN protein"
|
80
|
+
exp2.extend NamedEntity
|
81
|
+
exp2.offset = a.index exp2
|
82
|
+
|
83
|
+
[exp1, exp2]
|
84
|
+
end
|
85
|
+
assert_equal original, a
|
86
|
+
|
87
|
+
assert_equal "TP53 gene", exp1
|
88
|
+
assert_equal "CDK5R1 protein", exp2
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_html
|
92
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
93
|
+
|
94
|
+
gene1 = "TP53"
|
95
|
+
gene1.extend NamedEntity
|
96
|
+
gene1.offset = a.index gene1
|
97
|
+
gene1.type = "Gene"
|
98
|
+
|
99
|
+
gene2 = "CDK5R1"
|
100
|
+
gene2.extend NamedEntity
|
101
|
+
gene2.offset = a.index gene2
|
102
|
+
gene2.type = "Protein"
|
103
|
+
|
104
|
+
Transformed.with_transform(a, [gene1,gene2], Proc.new{|e| e.html}) do
|
105
|
+
assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Gene'>TP53</span> gene and the <span class='Entity' attr-entity-type='Protein'>CDK5R1</span> protein", a
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def test_html_with_offset
|
110
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
111
|
+
Segment.annotate(a, 10)
|
112
|
+
|
113
|
+
gene1 = "TP53"
|
114
|
+
gene1.extend NamedEntity
|
115
|
+
gene1.offset = a.index gene1
|
116
|
+
gene1.offset += 10
|
117
|
+
gene1.type = "Gene"
|
118
|
+
|
119
|
+
gene2 = "CDK5R1"
|
120
|
+
gene2.extend NamedEntity
|
121
|
+
gene2.offset = a.index gene2
|
122
|
+
gene2.offset += 10
|
123
|
+
gene2.type = "Protein"
|
124
|
+
|
125
|
+
Transformed.with_transform(a, [gene1,gene2], Proc.new{|e| e.html}) do
|
126
|
+
assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Gene'>TP53</span> gene and the <span class='Entity' attr-entity-type='Protein'>CDK5R1</span> protein", a
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def test_overlap
|
131
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
132
|
+
|
133
|
+
gene1 = "TP53"
|
134
|
+
gene1.extend NamedEntity
|
135
|
+
gene1.offset = a.index gene1
|
136
|
+
gene1.type = "Gene"
|
137
|
+
|
138
|
+
gene2 = "TP53 gene"
|
139
|
+
gene2.extend NamedEntity
|
140
|
+
gene2.offset = a.index gene2
|
141
|
+
gene2.type = "Expanded Gene"
|
142
|
+
|
143
|
+
assert_equal [gene1], Segment.overlaps(Segment.sort([gene1,gene2]))
|
144
|
+
|
145
|
+
Transformed.with_transform(a, [gene1], Proc.new{|e| e.html}) do
|
146
|
+
assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Gene'>TP53</span> gene and the CDK5R1 protein", a
|
147
|
+
Transformed.with_transform(a, [gene2], Proc.new{|e| e.html}) do
|
148
|
+
assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Expanded Gene'><span class='Entity' attr-entity-type='Gene'>TP53</span> gene</span> and the CDK5R1 protein", a
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def test_cascade_with_overlap_ignored
|
154
|
+
a = "This sentence mentions the HDL-C gene and the CDK5R1 protein"
|
155
|
+
|
156
|
+
gene1 = "HDL-C"
|
157
|
+
gene1.extend NamedEntity
|
158
|
+
gene1.offset = a.index gene1
|
159
|
+
gene1.type = "Gene"
|
160
|
+
|
161
|
+
gene2 = "-"
|
162
|
+
gene2.extend NamedEntity
|
163
|
+
gene2.offset = a.index gene2
|
164
|
+
gene2.type = "Dash"
|
165
|
+
|
166
|
+
Transformed.with_transform(a, [gene1], Proc.new{|e| e.html}) do
|
167
|
+
one = a.dup
|
168
|
+
Transformed.with_transform(a, [gene2], Proc.new{|e| e.html}) do
|
169
|
+
assert_equal one, a
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
data/test/rbbt/ner/test_abner.rb
CHANGED
@@ -1,8 +1,70 @@
|
|
1
1
|
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/ner/annotations'
|
3
|
+
require 'rbbt/ner/annotations/named_entity'
|
4
|
+
require 'rbbt/ner/annotations/transformed'
|
2
5
|
|
3
6
|
class TestClass < Test::Unit::TestCase
|
4
|
-
def
|
5
|
-
|
7
|
+
def test_info
|
8
|
+
a = "test"
|
9
|
+
a.extend NamedEntity
|
10
|
+
a.type = "type"
|
11
|
+
assert a.info.keys.include? "type"
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_segment_type
|
15
|
+
a = "test"
|
16
|
+
a.extend NamedEntity
|
17
|
+
assert a.segment_types.include? "NamedEntity"
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_align
|
21
|
+
text =<<-EOF
|
22
|
+
Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors of early childhood poorly responding to therapy.
|
23
|
+
EOF
|
24
|
+
|
25
|
+
parts = text.split(/\W/)
|
26
|
+
Segment.align(text, parts)
|
27
|
+
|
28
|
+
assert_equal "Atypical teratoid/".length, parts.select{|s| s == "rhabdoid"}.first.offset
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_sort
|
32
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
33
|
+
|
34
|
+
gene1 = "TP53"
|
35
|
+
gene1.extend NamedEntity
|
36
|
+
gene1.offset = a.index gene1
|
37
|
+
gene1.type = "Gene"
|
38
|
+
|
39
|
+
gene2 = "CDK5R1"
|
40
|
+
gene2.extend NamedEntity
|
41
|
+
gene2.offset = a.index gene2
|
42
|
+
gene2.type = "Gene"
|
43
|
+
|
44
|
+
assert_equal [gene1,gene2], Segment.sort([gene2,gene1])
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_clean_sort
|
49
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
50
|
+
|
51
|
+
gene1 = "TP53"
|
52
|
+
gene1.extend NamedEntity
|
53
|
+
gene1.offset = a.index gene1
|
54
|
+
gene1.type = "Gene"
|
55
|
+
|
56
|
+
gene2 = "CDK5R1"
|
57
|
+
gene2.extend NamedEntity
|
58
|
+
gene2.offset = a.index gene2
|
59
|
+
gene2.type = "Gene"
|
60
|
+
|
61
|
+
gene3 = "TP53 gene"
|
62
|
+
gene3.extend NamedEntity
|
63
|
+
gene3.offset = a.index gene3
|
64
|
+
gene3.type = "Gene"
|
65
|
+
|
66
|
+
assert_equal [gene3,gene2], Segment.clean_sort([gene2,gene1,gene3])
|
67
|
+
|
6
68
|
end
|
7
69
|
end
|
8
70
|
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
|
2
|
+
require 'rbbt/ner/chemical_tagger'
|
3
|
+
require 'rbbt/util/tmpfile'
|
4
|
+
require 'test/unit'
|
5
|
+
|
6
|
+
class TestChemicalTagger < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def test_match
|
9
|
+
begin
|
10
|
+
ner = ChemicalTagger.new
|
11
|
+
str = "Alternatively, rearrangement of O-(w-haloalkyl)esters 34 of 2-carboethoxy-N-hydroxypyridine-2-selone affords azonianaphthalenium halides 37 in 79% yield"
|
12
|
+
mentions = ner.match(str, "CM", false)
|
13
|
+
|
14
|
+
good_mentions = ["2-carboethoxy-N-hydroxypyridine-2-selone", "O-(w-haloalkyl)esters"]
|
15
|
+
|
16
|
+
good_mentions.each{|mention|
|
17
|
+
assert(mentions.include? mention)
|
18
|
+
}
|
19
|
+
rescue
|
20
|
+
puts $!.message
|
21
|
+
puts $!.backtrace
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_ranges
|
26
|
+
begin
|
27
|
+
ner = ChemicalTagger.new
|
28
|
+
str =<<-EOF
|
29
|
+
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
30
|
+
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
31
|
+
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
32
|
+
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
33
|
+
This otherone talks about O-(w-haloalkyl)esters.
|
34
|
+
This otherone talks about O-(w-haloalkyl)esters.
|
35
|
+
This otherone talks about O-(w-haloalkyl)esters.
|
36
|
+
|
37
|
+
This otherone talks about O-(w-haloalkyl)esters.
|
38
|
+
This otherone talks about O-(w-haloalkyl)esters.
|
39
|
+
EOF
|
40
|
+
|
41
|
+
mentions = ner.match(str, "CM", false)
|
42
|
+
|
43
|
+
str_original = str.dup
|
44
|
+
mentions.each do |mention|
|
45
|
+
str[mention.range] = mention
|
46
|
+
end
|
47
|
+
|
48
|
+
assert_equal str_original, str
|
49
|
+
|
50
|
+
rescue
|
51
|
+
puts $!.message
|
52
|
+
puts $!.backtrace
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/ner/ngram_prefix_dictionary'
|
3
|
+
require 'rbbt/util/tmpfile'
|
4
|
+
|
5
|
+
class TestNGramPrefixDictionary < Test::Unit::TestCase
|
6
|
+
def test_match
|
7
|
+
lexicon =<<-EOF
|
8
|
+
C1;aa;AA;bb b
|
9
|
+
C2;11;22;3 3;bb
|
10
|
+
EOF
|
11
|
+
|
12
|
+
TmpFile.with_file(lexicon) do |file|
|
13
|
+
index = NGramPrefixDictionary.new(TSV.new(file, :flat, :sep => ';'), "test")
|
14
|
+
|
15
|
+
matches = index.match(' asdfa dsf asdf aa asdfasdf ')
|
16
|
+
assert matches.select{|m| m.code.include? 'C1'}.any?
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
@@ -1,18 +1,17 @@
|
|
1
|
-
require File.dirname(__FILE__)
|
2
|
-
require 'rbbt/ner/
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
|
2
|
+
require 'rbbt/ner/oscar4'
|
3
3
|
require 'rbbt/util/tmpfile'
|
4
4
|
require 'test/unit'
|
5
5
|
|
6
|
-
class
|
7
|
-
|
6
|
+
class TestOSCAR4 < Test::Unit::TestCase
|
8
7
|
|
9
8
|
def test_match
|
10
9
|
begin
|
11
|
-
ner =
|
12
|
-
str = "Alternatively, rearrangement of O-(
|
10
|
+
ner = OSCAR4.new
|
11
|
+
str = "Alternatively, rearrangement of O-(w-haloalkyl)esters 34 of 2-carboethoxy-N-hydroxypyridine-2-selone affords azonianaphthalenium halides 37 in 79% yield"
|
13
12
|
|
14
13
|
mentions = ner.match(str, "CM", false)
|
15
|
-
good_mentions = ["2-carboethoxy-N-hydroxypyridine-2-selone", "O-(
|
14
|
+
good_mentions = ["2-carboethoxy-N-hydroxypyridine-2-selone", "O-(w-haloalkyl)esters"]
|
16
15
|
|
17
16
|
good_mentions.each{|mention|
|
18
17
|
assert(mentions.include? mention)
|
@@ -25,18 +24,18 @@ class TestOSCAR3 < Test::Unit::TestCase
|
|
25
24
|
|
26
25
|
def test_ranges
|
27
26
|
begin
|
28
|
-
ner =
|
27
|
+
ner = OSCAR4.new
|
29
28
|
str =<<-EOF
|
30
29
|
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
31
30
|
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
32
31
|
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
33
32
|
This sentence talks about 2-carboethoxy-N-hydroxypyridine-2-selone.
|
34
|
-
This otherone talks about O-(
|
35
|
-
This otherone talks about O-(
|
36
|
-
This otherone talks about O-(
|
33
|
+
This otherone talks about O-(w-haloalkyl)esters.
|
34
|
+
This otherone talks about O-(w-haloalkyl)esters.
|
35
|
+
This otherone talks about O-(w-haloalkyl)esters.
|
37
36
|
|
38
|
-
This otherone talks about O-(
|
39
|
-
This otherone talks about O-(
|
37
|
+
This otherone talks about O-(w-haloalkyl)esters.
|
38
|
+
This otherone talks about O-(w-haloalkyl)esters.
|
40
39
|
EOF
|
41
40
|
|
42
41
|
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
|
2
|
+
require 'rbbt/ner/patterns'
|
3
|
+
|
4
|
+
class TestPatternRelExt < Test::Unit::TestCase
|
5
|
+
def test_simple_pattern
|
6
|
+
text = "Experiments have shown that TP53 interacts with CDK5 under certain conditions"
|
7
|
+
|
8
|
+
gene1 = "TP53"
|
9
|
+
NamedEntity.annotate(gene1, text.index(gene1), "Gene")
|
10
|
+
|
11
|
+
gene2 = "CDK5"
|
12
|
+
NamedEntity.annotate(gene2, text.index(gene2), "Gene")
|
13
|
+
|
14
|
+
interaction = "interacts"
|
15
|
+
NamedEntity.annotate(interaction, text.index(interaction), "Interaction")
|
16
|
+
|
17
|
+
Annotated.annotate(text, [gene1, gene2, interaction])
|
18
|
+
|
19
|
+
assert_equal "TP53 interacts with CDK5", PatternRelExt.simple_pattern(text, "GENE INTERACTION with GENE").first
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_chunk_pattern
|
24
|
+
text = "Experiments have shown that TP53 found in cultivated cells interacts with CDK5 under certain conditions"
|
25
|
+
|
26
|
+
gene1 = "TP53"
|
27
|
+
NamedEntity.annotate(gene1, text.index(gene1), "Gene")
|
28
|
+
|
29
|
+
gene2 = "CDK5"
|
30
|
+
NamedEntity.annotate(gene2, text.index(gene2), "Gene")
|
31
|
+
|
32
|
+
interaction = "interacts"
|
33
|
+
NamedEntity.annotate(interaction, text.index(interaction), "Interaction")
|
34
|
+
|
35
|
+
Annotated.annotate(text, {:entities => [gene1, gene2, interaction]})
|
36
|
+
|
37
|
+
assert_equal "TP53 found in cultivated cells interacts with CDK5",
|
38
|
+
PatternRelExt.new("NP[entity:Gene] VP[stem:interacts] with NP[entity:Gene]").match_sentences([text]).first.first
|
39
|
+
|
40
|
+
assert_equal "TP53 found in cultivated cells interacts with CDK5",
|
41
|
+
PatternRelExt.new(["NP[entity:Gene] VP[stem:interacts] with NP[entity:Gene]"]).match_sentences([text]).first.first
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_chunk_pattern
|
45
|
+
text = "There is a concern with the use of thiazolidinediones in patients with an increased risk of colon cancer (e.g., familial colon polyposis)."
|
46
|
+
|
47
|
+
drug = "thiazolidinediones"
|
48
|
+
NamedEntity.annotate(drug, text.index(drug), "Chemical Mention")
|
49
|
+
|
50
|
+
disease = "colon cancer"
|
51
|
+
NamedEntity.annotate(disease, text.index(disease), "disease")
|
52
|
+
|
53
|
+
Annotated.annotate(text, {:entitites => [drug, disease]})
|
54
|
+
|
55
|
+
assert_equal "thiazolidinediones in patients with an increased risk of colon cancer",
|
56
|
+
PatternRelExt.new("NP[entity:Chemical Mention] NP[stem:risk] NP[entity:disease]").match_sentences([text]).first.first
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
def test_entities_with_spaces
|
62
|
+
PatternRelExt.new("NP[entity:Gene Name]").token_trie
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
end
|