rbbt-text 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/document.rb +46 -0
- data/lib/rbbt/document/annotation.rb +42 -0
- data/lib/rbbt/document/corpus.rb +38 -0
- data/lib/rbbt/document/corpus/pubmed.rb +33 -0
- data/lib/rbbt/ner/NER.rb +3 -3
- data/lib/rbbt/ner/abner.rb +1 -1
- data/lib/rbbt/ner/banner.rb +1 -1
- data/lib/rbbt/ner/brat.rb +1 -1
- data/lib/rbbt/ner/chemical_tagger.rb +1 -2
- data/lib/rbbt/ner/g_norm_plus.rb +19 -2
- data/lib/rbbt/ner/linnaeus.rb +3 -3
- data/lib/rbbt/ner/ngram_prefix_dictionary.rb +3 -3
- data/lib/rbbt/ner/oscar3.rb +1 -2
- data/lib/rbbt/ner/oscar4.rb +3 -3
- data/lib/rbbt/ner/patterns.rb +6 -5
- data/lib/rbbt/ner/regexpNER.rb +1 -2
- data/lib/rbbt/ner/token_trieNER.rb +6 -6
- data/lib/rbbt/nlp/genia/sentence_splitter.rb +1 -1
- data/lib/rbbt/nlp/nlp.rb +5 -5
- data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +37 -36
- data/lib/rbbt/segment.rb +177 -0
- data/lib/rbbt/segment/annotation.rb +58 -0
- data/lib/rbbt/segment/encoding.rb +18 -0
- data/lib/rbbt/{text/segment → segment}/named_entity.rb +11 -11
- data/lib/rbbt/segment/overlaps.rb +63 -0
- data/lib/rbbt/segment/range_index.rb +35 -0
- data/lib/rbbt/{text/segment → segment}/segmented.rb +1 -1
- data/lib/rbbt/segment/token.rb +23 -0
- data/lib/rbbt/{text/segment → segment}/transformed.rb +7 -9
- data/lib/rbbt/segment/tsv.rb +41 -0
- data/share/install/software/Linnaeus +1 -1
- data/test/rbbt/document/corpus/test_pubmed.rb +15 -0
- data/test/rbbt/document/test_annotation.rb +140 -0
- data/test/rbbt/document/test_corpus.rb +33 -0
- data/test/rbbt/ner/test_finder.rb +3 -3
- data/test/rbbt/ner/test_g_norm_plus.rb +11 -1
- data/test/rbbt/ner/test_patterns.rb +9 -9
- data/test/rbbt/ner/test_regexpNER.rb +14 -14
- data/test/rbbt/ner/test_rnorm.rb +3 -4
- data/test/rbbt/ner/test_token_trieNER.rb +1 -0
- data/test/rbbt/nlp/genia/test_sentence_splitter.rb +13 -3
- data/test/rbbt/nlp/open_nlp/test_sentence_splitter.rb +20 -4
- data/test/rbbt/segment/test_annotation.rb +40 -0
- data/test/rbbt/segment/test_corpus.rb +36 -0
- data/test/rbbt/segment/test_encoding.rb +24 -0
- data/test/rbbt/{text/segment → segment}/test_named_entity.rb +12 -9
- data/test/rbbt/segment/test_overlaps.rb +69 -0
- data/test/rbbt/segment/test_range_index.rb +43 -0
- data/test/rbbt/{text/segment → segment}/test_transformed.rb +76 -51
- data/test/rbbt/test_document.rb +14 -0
- data/test/rbbt/test_segment.rb +187 -0
- data/test/test_helper.rb +5 -3
- metadata +40 -32
- data/lib/rbbt/text/corpus.rb +0 -106
- data/lib/rbbt/text/corpus/document.rb +0 -383
- data/lib/rbbt/text/corpus/document_repo.rb +0 -68
- data/lib/rbbt/text/corpus/sources/pmid.rb +0 -34
- data/lib/rbbt/text/document.rb +0 -39
- data/lib/rbbt/text/segment.rb +0 -363
- data/lib/rbbt/text/segment/docid.rb +0 -46
- data/lib/rbbt/text/segment/relationship.rb +0 -24
- data/lib/rbbt/text/segment/token.rb +0 -49
- data/test/rbbt/text/corpus/sources/test_pmid.rb +0 -33
- data/test/rbbt/text/corpus/test_document.rb +0 -82
- data/test/rbbt/text/segment/test_relationship.rb +0 -0
- data/test/rbbt/text/segment/test_segmented.rb +0 -23
- data/test/rbbt/text/test_corpus.rb +0 -34
- data/test/rbbt/text/test_document.rb +0 -58
- data/test/rbbt/text/test_segment.rb +0 -100
@@ -0,0 +1,35 @@
|
|
1
|
+
module Segment::RangeIndex
|
2
|
+
attr_accessor :corpus
|
3
|
+
|
4
|
+
def [](*args)
|
5
|
+
res = super(*args)
|
6
|
+
SegID.setup(res, :corpus => corpus)
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.index(segments, corpus, persist_file = :memory)
|
10
|
+
segments = segments.values.flatten if Hash === segments
|
11
|
+
|
12
|
+
annotation_index =
|
13
|
+
Persist.persist("Segment_index", :fwt, :persist => (! (persist_file.nil? or persist_file == :memory)), :file => persist_file) do
|
14
|
+
|
15
|
+
value_size = 0
|
16
|
+
index_data = segments.collect{|segment|
|
17
|
+
next if segment.offset.nil?
|
18
|
+
range = segment.range
|
19
|
+
value_size = [segment.segid.length, value_size].max
|
20
|
+
[segment.segid, [range.begin, range.end]]
|
21
|
+
}.compact
|
22
|
+
|
23
|
+
fwt = FixWidthTable.get :memory, value_size, true
|
24
|
+
fwt.add_range index_data
|
25
|
+
|
26
|
+
fwt
|
27
|
+
end
|
28
|
+
|
29
|
+
annotation_index.extend Segment::RangeIndex
|
30
|
+
annotation_index.corpus = corpus
|
31
|
+
annotation_index
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'rbbt/segment'
|
2
|
+
|
3
|
+
module Token
|
4
|
+
extend Entity
|
5
|
+
include Segment
|
6
|
+
|
7
|
+
self.annotation :original
|
8
|
+
|
9
|
+
def self.tokenize(text, split_at = /\s|(\(|\)|[-."':,])/, start = 0)
|
10
|
+
|
11
|
+
tokens = []
|
12
|
+
while matchdata = text.match(split_at)
|
13
|
+
tokens << Token.setup(matchdata.pre_match, :offset => start) unless matchdata.pre_match.empty?
|
14
|
+
tokens << Token.setup(matchdata.captures.first, :offset => start + matchdata.begin(1)) if matchdata.captures.any? and not matchdata.captures.first.empty?
|
15
|
+
start += matchdata.end(0)
|
16
|
+
text = matchdata.post_match
|
17
|
+
end
|
18
|
+
|
19
|
+
tokens << Token.setup(text, :offset => start) unless text.empty?
|
20
|
+
|
21
|
+
tokens
|
22
|
+
end
|
23
|
+
end
|
@@ -1,6 +1,3 @@
|
|
1
|
-
require 'rbbt/util/misc'
|
2
|
-
require 'rbbt/text/segment'
|
3
|
-
|
4
1
|
module Transformed
|
5
2
|
|
6
3
|
def self.transform(text, segments, replacement = nil, &block)
|
@@ -111,10 +108,10 @@ module Transformed
|
|
111
108
|
|
112
109
|
self[updated_begin..updated_end] = new
|
113
110
|
|
114
|
-
@transformed_segments[segment.
|
111
|
+
@transformed_segments[segment.object_id] = [segment.range, diff, updated_text, updated_range, @transformed_segments.size]
|
115
112
|
|
116
113
|
segment.replace original_text
|
117
|
-
stack << segment.
|
114
|
+
stack << segment.object_id
|
118
115
|
end
|
119
116
|
@transformation_stack << stack
|
120
117
|
end
|
@@ -122,13 +119,13 @@ module Transformed
|
|
122
119
|
def fix_segment(segment, range, diff)
|
123
120
|
case
|
124
121
|
# Before
|
125
|
-
when segment.
|
122
|
+
when segment.eend < range.begin
|
126
123
|
# After
|
127
124
|
when segment.offset.to_i > range.end + diff
|
128
125
|
segment.offset = segment.offset.to_i - diff
|
129
126
|
# Includes
|
130
|
-
when (segment.offset.to_i <= range.begin and segment.
|
131
|
-
segment.replace self[segment.offset.to_i..segment.
|
127
|
+
when (segment.offset.to_i <= range.begin and segment.eend >= range.end + diff)
|
128
|
+
segment.replace self[segment.offset.to_i..segment.eend - diff]
|
132
129
|
else
|
133
130
|
raise "Segment Overlaps"
|
134
131
|
end
|
@@ -141,7 +138,8 @@ module Transformed
|
|
141
138
|
|
142
139
|
if first_only
|
143
140
|
@transformation_stack.pop.reverse.each do |id|
|
144
|
-
|
141
|
+
segment_info = @transformed_segments.delete id
|
142
|
+
orig_range, diff, text, range = segment_info
|
145
143
|
|
146
144
|
new_range = (range.begin..range.last + diff)
|
147
145
|
self[new_range] = text
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#module Segment
|
2
|
+
#
|
3
|
+
# def self.set_tsv_fields(fields, segments)
|
4
|
+
# tsv_fields = []
|
5
|
+
# add_types = ! (fields.delete(:no_types) || fields.delete("no_types") || fields.include?(:JSON) || fields.include?("JSON"))
|
6
|
+
# literal = (fields.delete(:literal) || fields.delete("literal"))
|
7
|
+
# tsv_fields << "Start" << "End"
|
8
|
+
# tsv_fields << :annotation_types if add_types
|
9
|
+
# tsv_fields << :literal if literal
|
10
|
+
#
|
11
|
+
# if fields.any? and not (fields == [:all] or fields == ["all"])
|
12
|
+
# tsv_fields.concat fields
|
13
|
+
# else
|
14
|
+
# tsv_fields.concat segments.first.annotations if segments.any?
|
15
|
+
# end
|
16
|
+
# tsv_fields
|
17
|
+
# tsv_fields.collect!{|f| f.to_s}
|
18
|
+
# tsv_fields.delete "offset"
|
19
|
+
# tsv_fields
|
20
|
+
# end
|
21
|
+
#
|
22
|
+
# def self.tsv(segments, *fields)
|
23
|
+
# fields = set_tsv_fields fields, segments
|
24
|
+
# tsv = TSV.setup({}, :key_field => "ID", :fields => fields, :type => :double)
|
25
|
+
#
|
26
|
+
# segments.each do |segment|
|
27
|
+
# tsv[segment.segment_id] = self.tsv_values_for_segment(segment, fields)
|
28
|
+
# end
|
29
|
+
#
|
30
|
+
# tsv
|
31
|
+
# end
|
32
|
+
#
|
33
|
+
# def self.load_tsv(tsv)
|
34
|
+
# fields = tsv.fields
|
35
|
+
# tsv.with_unnamed do
|
36
|
+
# tsv.collect do |id, values|
|
37
|
+
# Annotated.load_tsv_values(id, values, fields)
|
38
|
+
# end
|
39
|
+
# end
|
40
|
+
# end
|
41
|
+
#end
|
@@ -12,7 +12,7 @@ pkg_dir="`opt_dir \"$name\"`"
|
|
12
12
|
build_dir=`build_dir`
|
13
13
|
mv "$build_dir" "$pkg_dir"
|
14
14
|
tmp_file="~/.rbbt/tmp/species-proxy-properties.tmp"
|
15
|
-
mkdir -p $(
|
15
|
+
mkdir -p $(dirname "$tmp_file")
|
16
16
|
cat "$pkg_dir/species-proxy/properties.conf" |grep -v "^.dir =" >> $tmp_file
|
17
17
|
echo "\$dir = $pkg_dir/species-proxy/" > "$pkg_dir/species-proxy/properties.conf"
|
18
18
|
cat $tmp_file | grep -v "^#" >> "$pkg_dir/species-proxy/properties.conf"
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/document'
|
3
|
+
require 'rbbt/document/corpus'
|
4
|
+
require 'rbbt/document/corpus/pubmed'
|
5
|
+
|
6
|
+
class TestCorpusPubmed < Test::Unit::TestCase
|
7
|
+
def test_add_pmid
|
8
|
+
corpus = Document::Corpus.setup({})
|
9
|
+
|
10
|
+
document = corpus.add_pmid("32299157", :abstract).first
|
11
|
+
title = document.to(:title)
|
12
|
+
assert title.include?("COVID-19")
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
@@ -0,0 +1,140 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/document'
|
3
|
+
require 'rbbt/document/corpus'
|
4
|
+
require 'rbbt/segment'
|
5
|
+
require 'rbbt/document/annotation'
|
6
|
+
require 'rbbt/segment/named_entity'
|
7
|
+
|
8
|
+
class TestAnnotation < Test::Unit::TestCase
|
9
|
+
class CalledOnce < Exception; end
|
10
|
+
def setup
|
11
|
+
Document.define :words do
|
12
|
+
self.split(" ")
|
13
|
+
end
|
14
|
+
|
15
|
+
$called_once = false
|
16
|
+
Document.define :persisted_words do
|
17
|
+
raise CalledOnce if $called_once
|
18
|
+
$called_once = true
|
19
|
+
self.split(" ")
|
20
|
+
end
|
21
|
+
|
22
|
+
Document.define_multiple :multiple_words do |list|
|
23
|
+
list.collect{|doc| doc.words}
|
24
|
+
end
|
25
|
+
|
26
|
+
Document.define :ner do
|
27
|
+
$called_once = true
|
28
|
+
self.split(" ").collect{|e| NamedEntity.setup(e, :code => Misc.digest(e)) }
|
29
|
+
end
|
30
|
+
|
31
|
+
Document.persist :ner
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_define
|
35
|
+
text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
36
|
+
Document.setup(text, "TEST", "test_doc1", nil)
|
37
|
+
|
38
|
+
corpus = {}
|
39
|
+
corpus.extend Document::Corpus
|
40
|
+
|
41
|
+
corpus.add_document(text)
|
42
|
+
|
43
|
+
assert_equal text[text.words[1].range], text.words[1]
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_define_multiple
|
47
|
+
text1 = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
48
|
+
text2 = "This is another sentence"
|
49
|
+
Document.setup(text1, "TEST", "test_doc1", nil)
|
50
|
+
Document.setup(text2, "TEST", "test_doc2", nil)
|
51
|
+
|
52
|
+
corpus = {}
|
53
|
+
corpus.extend Document::Corpus
|
54
|
+
|
55
|
+
corpus.add_document(text1)
|
56
|
+
corpus.add_document(text2)
|
57
|
+
|
58
|
+
assert_equal 2, Document.setup([text1, text2]).multiple_words.length
|
59
|
+
assert_equal text1.split(" "), text1.multiple_words
|
60
|
+
|
61
|
+
#Document.persist :multiple_words, :annotations, :annotation_repo => Rbbt.tmp.test.multiple_words
|
62
|
+
#assert_equal 2, Document.setup([text1, text2]).multiple_words.length
|
63
|
+
#assert_equal text1.split(" "), text1.multiple_words
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_persist
|
67
|
+
text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
68
|
+
Document.setup(text, "TEST", "test_doc1", nil)
|
69
|
+
|
70
|
+
corpus = {}
|
71
|
+
corpus.extend Document::Corpus
|
72
|
+
|
73
|
+
corpus.add_document(text)
|
74
|
+
|
75
|
+
assert_equal "persisted_words", text.persisted_words.first.type
|
76
|
+
|
77
|
+
assert_raise CalledOnce do
|
78
|
+
assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
|
79
|
+
end
|
80
|
+
|
81
|
+
Log.severity = 0
|
82
|
+
Document.persist :persisted_words, :annotations, :file => Rbbt.tmp.test.persisted_words.find(:user)
|
83
|
+
|
84
|
+
$called_once = false
|
85
|
+
text.persisted_words
|
86
|
+
assert $called_once
|
87
|
+
|
88
|
+
assert_nothing_raised do
|
89
|
+
assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def test_persist_annotation_repo
|
94
|
+
text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
95
|
+
Document.setup(text, "TEST", "test_doc1", nil)
|
96
|
+
|
97
|
+
corpus = {}
|
98
|
+
corpus.extend Document::Corpus
|
99
|
+
|
100
|
+
corpus.add_document(text)
|
101
|
+
|
102
|
+
assert_equal "persisted_words", text.persisted_words.first.type
|
103
|
+
|
104
|
+
assert_raise CalledOnce do
|
105
|
+
assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
|
106
|
+
end
|
107
|
+
|
108
|
+
Log.severity = 0
|
109
|
+
Document.persist :persisted_words, :annotations, :annotation_repo => Rbbt.tmp.test.persisted_words_repo.find(:user)
|
110
|
+
|
111
|
+
$called_once = false
|
112
|
+
text.persisted_words
|
113
|
+
assert $called_once
|
114
|
+
|
115
|
+
assert_nothing_raised do
|
116
|
+
assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def test_persist_ner
|
121
|
+
text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
122
|
+
Document.setup(text, "TEST", "test_doc1", nil)
|
123
|
+
|
124
|
+
corpus = {}
|
125
|
+
corpus.extend Document::Corpus
|
126
|
+
|
127
|
+
corpus.add_document(text)
|
128
|
+
|
129
|
+
|
130
|
+
text.ner
|
131
|
+
|
132
|
+
$called_once = false
|
133
|
+
text.ner
|
134
|
+
|
135
|
+
assert ! $called_once
|
136
|
+
|
137
|
+
assert text.ner.first.segid.include?("TEST:")
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/document'
|
3
|
+
require 'rbbt/document/corpus'
|
4
|
+
|
5
|
+
class TestDocumentCorpus < Test::Unit::TestCase
|
6
|
+
def test_corpus
|
7
|
+
text = "This is a document"
|
8
|
+
Document.setup(text, "TEST", "test_doc1", nil)
|
9
|
+
|
10
|
+
corpus = Document::Corpus.setup({})
|
11
|
+
|
12
|
+
corpus.add_document(text)
|
13
|
+
|
14
|
+
docid = text.docid(corpus)
|
15
|
+
|
16
|
+
assert_equal docid.document, text
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_find
|
20
|
+
text = "This is a document"
|
21
|
+
Document.setup(text, "TEST", "test_doc1", nil)
|
22
|
+
|
23
|
+
TmpFile.with_file do |path|
|
24
|
+
corpus = Persist.open_tokyocabinet(path, true, :single, "BDB")
|
25
|
+
corpus.extend Document::Corpus
|
26
|
+
|
27
|
+
corpus.add_document(text)
|
28
|
+
|
29
|
+
assert corpus.prefix("TEST:").include?(text.docid)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
@@ -8,13 +8,13 @@ require 'rbbt/sources/NCI'
|
|
8
8
|
|
9
9
|
class TestFinder < Test::Unit::TestCase
|
10
10
|
|
11
|
-
def
|
11
|
+
def _test_namespace_and_format
|
12
12
|
f = Finder.new(CMD.cmd("head -n 1000", :in => Open.open(Organism.identifiers(Organism.default_code("Hsa")).produce.find)))
|
13
13
|
assert_equal Organism.default_code("Hsa"), f.instances.first.namespace
|
14
14
|
assert_equal "Ensembl Gene ID", f.instances.first.format
|
15
15
|
end
|
16
16
|
|
17
|
-
def
|
17
|
+
def _test_find
|
18
18
|
f = Finder.new(Organism.lexicon(Organism.default_code("Hsa")), :grep => ["SF3B1"])
|
19
19
|
|
20
20
|
assert_equal "ENSG00000115524", f.find("SF3B1").first
|
@@ -23,7 +23,7 @@ class TestFinder < Test::Unit::TestCase
|
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
-
def
|
26
|
+
def _test_find2
|
27
27
|
f = Finder.new(Organism.lexicon(Organism.default_code("Hsa")), :grep => ["RASGRF2"])
|
28
28
|
|
29
29
|
m = f.find("RAS").first
|
@@ -9,7 +9,17 @@ We found that TP53 is regulated by MDM2 in Homo sapiens
|
|
9
9
|
EOF
|
10
10
|
|
11
11
|
mentions = GNormPlus.process({:file => text})
|
12
|
-
|
12
|
+
assert_equal 1, mentions.length
|
13
|
+
assert_equal 2, mentions["file"].length
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_entities
|
17
|
+
text =<<-EOF
|
18
|
+
We found that TP53 is regulated by MDM2 in Homo sapiens
|
19
|
+
EOF
|
20
|
+
|
21
|
+
mentions = GNormPlus.entities({:file => text})
|
22
|
+
mentions["file"].include? "TP53"
|
13
23
|
end
|
14
24
|
end
|
15
25
|
|
@@ -2,17 +2,17 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.r
|
|
2
2
|
require 'rbbt/ner/patterns'
|
3
3
|
|
4
4
|
class TestPatternRelExt < Test::Unit::TestCase
|
5
|
-
def
|
5
|
+
def _test_simple_pattern
|
6
6
|
text = "Experiments have shown that TP53 interacts with CDK5 under certain conditions"
|
7
7
|
|
8
8
|
gene1 = "TP53"
|
9
|
-
NamedEntity.setup(gene1, text.index(gene1), "Gene")
|
9
|
+
NamedEntity.setup(gene1, :offset => text.index(gene1), :entity_type => "Gene")
|
10
10
|
|
11
11
|
gene2 = "CDK5"
|
12
|
-
NamedEntity.setup(gene2, text.index(gene2), "Gene")
|
12
|
+
NamedEntity.setup(gene2, :offset => text.index(gene2), :entity_type => "Gene")
|
13
13
|
|
14
14
|
interaction = "interacts"
|
15
|
-
NamedEntity.setup(interaction, text.index(interaction), "Interaction")
|
15
|
+
NamedEntity.setup(interaction, :offset => text.index(interaction), :entity_type => "Interaction")
|
16
16
|
|
17
17
|
Segmented.setup(text, [gene1, gene2, interaction])
|
18
18
|
|
@@ -23,13 +23,13 @@ class TestPatternRelExt < Test::Unit::TestCase
|
|
23
23
|
text = "Experiments have shown that TP53 found in cultivated cells interacts with CDK5 under certain conditions"
|
24
24
|
|
25
25
|
gene1 = "TP53"
|
26
|
-
NamedEntity.setup(gene1, text.index(gene1), "Gene")
|
26
|
+
NamedEntity.setup(gene1, :offset => text.index(gene1), :entity_type => "Gene")
|
27
27
|
|
28
28
|
gene2 = "CDK5"
|
29
|
-
NamedEntity.setup(gene2, text.index(gene2), "Gene")
|
29
|
+
NamedEntity.setup(gene2, :offset => text.index(gene2), :entity_type => "Gene")
|
30
30
|
|
31
31
|
interaction = "interacts"
|
32
|
-
NamedEntity.setup(interaction, text.index(interaction), "Interaction")
|
32
|
+
NamedEntity.setup(interaction, :offset => text.index(interaction), :entity_type => "Interaction")
|
33
33
|
|
34
34
|
Segmented.setup(text, {:entities => [gene1, gene2, interaction]})
|
35
35
|
|
@@ -40,7 +40,7 @@ class TestPatternRelExt < Test::Unit::TestCase
|
|
40
40
|
PatternRelExt.new(["NP[entity:Gene] VP[stem:interacts] with NP[entity:Gene]"]).match_sentences([text]).first.first
|
41
41
|
end
|
42
42
|
|
43
|
-
def
|
43
|
+
def _test_chunk_pattern
|
44
44
|
text = "There is a concern with the use of thiazolidinediones in patients with an increased risk of colon cancer (e.g., familial colon polyposis)."
|
45
45
|
|
46
46
|
drug = "thiazolidinediones"
|
@@ -57,7 +57,7 @@ class TestPatternRelExt < Test::Unit::TestCase
|
|
57
57
|
end
|
58
58
|
|
59
59
|
|
60
|
-
def
|
60
|
+
def _test_entities_with_spaces
|
61
61
|
PatternRelExt.new("NP[entity:Gene Name]").token_trie
|
62
62
|
end
|
63
63
|
|