rbbt-text 1.2.0 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/document.rb +46 -0
- data/lib/rbbt/document/annotation.rb +42 -0
- data/lib/rbbt/document/corpus.rb +38 -0
- data/lib/rbbt/document/corpus/pubmed.rb +33 -0
- data/lib/rbbt/ner/NER.rb +3 -3
- data/lib/rbbt/ner/abner.rb +1 -1
- data/lib/rbbt/ner/banner.rb +1 -1
- data/lib/rbbt/ner/brat.rb +1 -1
- data/lib/rbbt/ner/chemical_tagger.rb +1 -2
- data/lib/rbbt/ner/g_norm_plus.rb +19 -2
- data/lib/rbbt/ner/linnaeus.rb +3 -3
- data/lib/rbbt/ner/ngram_prefix_dictionary.rb +3 -3
- data/lib/rbbt/ner/oscar3.rb +1 -2
- data/lib/rbbt/ner/oscar4.rb +3 -3
- data/lib/rbbt/ner/patterns.rb +6 -5
- data/lib/rbbt/ner/regexpNER.rb +1 -2
- data/lib/rbbt/ner/token_trieNER.rb +6 -6
- data/lib/rbbt/nlp/genia/sentence_splitter.rb +1 -1
- data/lib/rbbt/nlp/nlp.rb +5 -5
- data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +37 -36
- data/lib/rbbt/segment.rb +177 -0
- data/lib/rbbt/segment/annotation.rb +58 -0
- data/lib/rbbt/segment/encoding.rb +18 -0
- data/lib/rbbt/{text/segment → segment}/named_entity.rb +11 -11
- data/lib/rbbt/segment/overlaps.rb +63 -0
- data/lib/rbbt/segment/range_index.rb +35 -0
- data/lib/rbbt/{text/segment → segment}/segmented.rb +1 -1
- data/lib/rbbt/segment/token.rb +23 -0
- data/lib/rbbt/{text/segment → segment}/transformed.rb +7 -9
- data/lib/rbbt/segment/tsv.rb +41 -0
- data/share/install/software/Linnaeus +1 -1
- data/test/rbbt/document/corpus/test_pubmed.rb +15 -0
- data/test/rbbt/document/test_annotation.rb +140 -0
- data/test/rbbt/document/test_corpus.rb +33 -0
- data/test/rbbt/ner/test_finder.rb +3 -3
- data/test/rbbt/ner/test_g_norm_plus.rb +11 -1
- data/test/rbbt/ner/test_patterns.rb +9 -9
- data/test/rbbt/ner/test_regexpNER.rb +14 -14
- data/test/rbbt/ner/test_rnorm.rb +3 -4
- data/test/rbbt/ner/test_token_trieNER.rb +1 -0
- data/test/rbbt/nlp/genia/test_sentence_splitter.rb +13 -3
- data/test/rbbt/nlp/open_nlp/test_sentence_splitter.rb +20 -4
- data/test/rbbt/segment/test_annotation.rb +40 -0
- data/test/rbbt/segment/test_corpus.rb +36 -0
- data/test/rbbt/segment/test_encoding.rb +24 -0
- data/test/rbbt/{text/segment → segment}/test_named_entity.rb +12 -9
- data/test/rbbt/segment/test_overlaps.rb +69 -0
- data/test/rbbt/segment/test_range_index.rb +43 -0
- data/test/rbbt/{text/segment → segment}/test_transformed.rb +76 -51
- data/test/rbbt/test_document.rb +14 -0
- data/test/rbbt/test_segment.rb +187 -0
- data/test/test_helper.rb +5 -3
- metadata +40 -32
- data/lib/rbbt/text/corpus.rb +0 -106
- data/lib/rbbt/text/corpus/document.rb +0 -383
- data/lib/rbbt/text/corpus/document_repo.rb +0 -68
- data/lib/rbbt/text/corpus/sources/pmid.rb +0 -34
- data/lib/rbbt/text/document.rb +0 -39
- data/lib/rbbt/text/segment.rb +0 -363
- data/lib/rbbt/text/segment/docid.rb +0 -46
- data/lib/rbbt/text/segment/relationship.rb +0 -24
- data/lib/rbbt/text/segment/token.rb +0 -49
- data/test/rbbt/text/corpus/sources/test_pmid.rb +0 -33
- data/test/rbbt/text/corpus/test_document.rb +0 -82
- data/test/rbbt/text/segment/test_relationship.rb +0 -0
- data/test/rbbt/text/segment/test_segmented.rb +0 -23
- data/test/rbbt/text/test_corpus.rb +0 -34
- data/test/rbbt/text/test_document.rb +0 -58
- data/test/rbbt/text/test_segment.rb +0 -100
@@ -0,0 +1,35 @@
|
|
1
|
+
module Segment::RangeIndex
|
2
|
+
attr_accessor :corpus
|
3
|
+
|
4
|
+
def [](*args)
|
5
|
+
res = super(*args)
|
6
|
+
SegID.setup(res, :corpus => corpus)
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.index(segments, corpus, persist_file = :memory)
|
10
|
+
segments = segments.values.flatten if Hash === segments
|
11
|
+
|
12
|
+
annotation_index =
|
13
|
+
Persist.persist("Segment_index", :fwt, :persist => (! (persist_file.nil? or persist_file == :memory)), :file => persist_file) do
|
14
|
+
|
15
|
+
value_size = 0
|
16
|
+
index_data = segments.collect{|segment|
|
17
|
+
next if segment.offset.nil?
|
18
|
+
range = segment.range
|
19
|
+
value_size = [segment.segid.length, value_size].max
|
20
|
+
[segment.segid, [range.begin, range.end]]
|
21
|
+
}.compact
|
22
|
+
|
23
|
+
fwt = FixWidthTable.get :memory, value_size, true
|
24
|
+
fwt.add_range index_data
|
25
|
+
|
26
|
+
fwt
|
27
|
+
end
|
28
|
+
|
29
|
+
annotation_index.extend Segment::RangeIndex
|
30
|
+
annotation_index.corpus = corpus
|
31
|
+
annotation_index
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'rbbt/segment'
|
2
|
+
|
3
|
+
module Token
|
4
|
+
extend Entity
|
5
|
+
include Segment
|
6
|
+
|
7
|
+
self.annotation :original
|
8
|
+
|
9
|
+
def self.tokenize(text, split_at = /\s|(\(|\)|[-."':,])/, start = 0)
|
10
|
+
|
11
|
+
tokens = []
|
12
|
+
while matchdata = text.match(split_at)
|
13
|
+
tokens << Token.setup(matchdata.pre_match, :offset => start) unless matchdata.pre_match.empty?
|
14
|
+
tokens << Token.setup(matchdata.captures.first, :offset => start + matchdata.begin(1)) if matchdata.captures.any? and not matchdata.captures.first.empty?
|
15
|
+
start += matchdata.end(0)
|
16
|
+
text = matchdata.post_match
|
17
|
+
end
|
18
|
+
|
19
|
+
tokens << Token.setup(text, :offset => start) unless text.empty?
|
20
|
+
|
21
|
+
tokens
|
22
|
+
end
|
23
|
+
end
|
@@ -1,6 +1,3 @@
|
|
1
|
-
require 'rbbt/util/misc'
|
2
|
-
require 'rbbt/text/segment'
|
3
|
-
|
4
1
|
module Transformed
|
5
2
|
|
6
3
|
def self.transform(text, segments, replacement = nil, &block)
|
@@ -111,10 +108,10 @@ module Transformed
|
|
111
108
|
|
112
109
|
self[updated_begin..updated_end] = new
|
113
110
|
|
114
|
-
@transformed_segments[segment.
|
111
|
+
@transformed_segments[segment.object_id] = [segment.range, diff, updated_text, updated_range, @transformed_segments.size]
|
115
112
|
|
116
113
|
segment.replace original_text
|
117
|
-
stack << segment.
|
114
|
+
stack << segment.object_id
|
118
115
|
end
|
119
116
|
@transformation_stack << stack
|
120
117
|
end
|
@@ -122,13 +119,13 @@ module Transformed
|
|
122
119
|
def fix_segment(segment, range, diff)
|
123
120
|
case
|
124
121
|
# Before
|
125
|
-
when segment.
|
122
|
+
when segment.eend < range.begin
|
126
123
|
# After
|
127
124
|
when segment.offset.to_i > range.end + diff
|
128
125
|
segment.offset = segment.offset.to_i - diff
|
129
126
|
# Includes
|
130
|
-
when (segment.offset.to_i <= range.begin and segment.
|
131
|
-
segment.replace self[segment.offset.to_i..segment.
|
127
|
+
when (segment.offset.to_i <= range.begin and segment.eend >= range.end + diff)
|
128
|
+
segment.replace self[segment.offset.to_i..segment.eend - diff]
|
132
129
|
else
|
133
130
|
raise "Segment Overlaps"
|
134
131
|
end
|
@@ -141,7 +138,8 @@ module Transformed
|
|
141
138
|
|
142
139
|
if first_only
|
143
140
|
@transformation_stack.pop.reverse.each do |id|
|
144
|
-
|
141
|
+
segment_info = @transformed_segments.delete id
|
142
|
+
orig_range, diff, text, range = segment_info
|
145
143
|
|
146
144
|
new_range = (range.begin..range.last + diff)
|
147
145
|
self[new_range] = text
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#module Segment
|
2
|
+
#
|
3
|
+
# def self.set_tsv_fields(fields, segments)
|
4
|
+
# tsv_fields = []
|
5
|
+
# add_types = ! (fields.delete(:no_types) || fields.delete("no_types") || fields.include?(:JSON) || fields.include?("JSON"))
|
6
|
+
# literal = (fields.delete(:literal) || fields.delete("literal"))
|
7
|
+
# tsv_fields << "Start" << "End"
|
8
|
+
# tsv_fields << :annotation_types if add_types
|
9
|
+
# tsv_fields << :literal if literal
|
10
|
+
#
|
11
|
+
# if fields.any? and not (fields == [:all] or fields == ["all"])
|
12
|
+
# tsv_fields.concat fields
|
13
|
+
# else
|
14
|
+
# tsv_fields.concat segments.first.annotations if segments.any?
|
15
|
+
# end
|
16
|
+
# tsv_fields
|
17
|
+
# tsv_fields.collect!{|f| f.to_s}
|
18
|
+
# tsv_fields.delete "offset"
|
19
|
+
# tsv_fields
|
20
|
+
# end
|
21
|
+
#
|
22
|
+
# def self.tsv(segments, *fields)
|
23
|
+
# fields = set_tsv_fields fields, segments
|
24
|
+
# tsv = TSV.setup({}, :key_field => "ID", :fields => fields, :type => :double)
|
25
|
+
#
|
26
|
+
# segments.each do |segment|
|
27
|
+
# tsv[segment.segment_id] = self.tsv_values_for_segment(segment, fields)
|
28
|
+
# end
|
29
|
+
#
|
30
|
+
# tsv
|
31
|
+
# end
|
32
|
+
#
|
33
|
+
# def self.load_tsv(tsv)
|
34
|
+
# fields = tsv.fields
|
35
|
+
# tsv.with_unnamed do
|
36
|
+
# tsv.collect do |id, values|
|
37
|
+
# Annotated.load_tsv_values(id, values, fields)
|
38
|
+
# end
|
39
|
+
# end
|
40
|
+
# end
|
41
|
+
#end
|
@@ -12,7 +12,7 @@ pkg_dir="`opt_dir \"$name\"`"
|
|
12
12
|
build_dir=`build_dir`
|
13
13
|
mv "$build_dir" "$pkg_dir"
|
14
14
|
tmp_file="~/.rbbt/tmp/species-proxy-properties.tmp"
|
15
|
-
mkdir -p $(
|
15
|
+
mkdir -p $(dirname "$tmp_file")
|
16
16
|
cat "$pkg_dir/species-proxy/properties.conf" |grep -v "^.dir =" >> $tmp_file
|
17
17
|
echo "\$dir = $pkg_dir/species-proxy/" > "$pkg_dir/species-proxy/properties.conf"
|
18
18
|
cat $tmp_file | grep -v "^#" >> "$pkg_dir/species-proxy/properties.conf"
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/document'
|
3
|
+
require 'rbbt/document/corpus'
|
4
|
+
require 'rbbt/document/corpus/pubmed'
|
5
|
+
|
6
|
+
class TestCorpusPubmed < Test::Unit::TestCase
|
7
|
+
def test_add_pmid
|
8
|
+
corpus = Document::Corpus.setup({})
|
9
|
+
|
10
|
+
document = corpus.add_pmid("32299157", :abstract).first
|
11
|
+
title = document.to(:title)
|
12
|
+
assert title.include?("COVID-19")
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
@@ -0,0 +1,140 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/document'
|
3
|
+
require 'rbbt/document/corpus'
|
4
|
+
require 'rbbt/segment'
|
5
|
+
require 'rbbt/document/annotation'
|
6
|
+
require 'rbbt/segment/named_entity'
|
7
|
+
|
8
|
+
class TestAnnotation < Test::Unit::TestCase
|
9
|
+
class CalledOnce < Exception; end
|
10
|
+
def setup
|
11
|
+
Document.define :words do
|
12
|
+
self.split(" ")
|
13
|
+
end
|
14
|
+
|
15
|
+
$called_once = false
|
16
|
+
Document.define :persisted_words do
|
17
|
+
raise CalledOnce if $called_once
|
18
|
+
$called_once = true
|
19
|
+
self.split(" ")
|
20
|
+
end
|
21
|
+
|
22
|
+
Document.define_multiple :multiple_words do |list|
|
23
|
+
list.collect{|doc| doc.words}
|
24
|
+
end
|
25
|
+
|
26
|
+
Document.define :ner do
|
27
|
+
$called_once = true
|
28
|
+
self.split(" ").collect{|e| NamedEntity.setup(e, :code => Misc.digest(e)) }
|
29
|
+
end
|
30
|
+
|
31
|
+
Document.persist :ner
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_define
|
35
|
+
text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
36
|
+
Document.setup(text, "TEST", "test_doc1", nil)
|
37
|
+
|
38
|
+
corpus = {}
|
39
|
+
corpus.extend Document::Corpus
|
40
|
+
|
41
|
+
corpus.add_document(text)
|
42
|
+
|
43
|
+
assert_equal text[text.words[1].range], text.words[1]
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_define_multiple
|
47
|
+
text1 = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
48
|
+
text2 = "This is another sentence"
|
49
|
+
Document.setup(text1, "TEST", "test_doc1", nil)
|
50
|
+
Document.setup(text2, "TEST", "test_doc2", nil)
|
51
|
+
|
52
|
+
corpus = {}
|
53
|
+
corpus.extend Document::Corpus
|
54
|
+
|
55
|
+
corpus.add_document(text1)
|
56
|
+
corpus.add_document(text2)
|
57
|
+
|
58
|
+
assert_equal 2, Document.setup([text1, text2]).multiple_words.length
|
59
|
+
assert_equal text1.split(" "), text1.multiple_words
|
60
|
+
|
61
|
+
#Document.persist :multiple_words, :annotations, :annotation_repo => Rbbt.tmp.test.multiple_words
|
62
|
+
#assert_equal 2, Document.setup([text1, text2]).multiple_words.length
|
63
|
+
#assert_equal text1.split(" "), text1.multiple_words
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_persist
|
67
|
+
text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
68
|
+
Document.setup(text, "TEST", "test_doc1", nil)
|
69
|
+
|
70
|
+
corpus = {}
|
71
|
+
corpus.extend Document::Corpus
|
72
|
+
|
73
|
+
corpus.add_document(text)
|
74
|
+
|
75
|
+
assert_equal "persisted_words", text.persisted_words.first.type
|
76
|
+
|
77
|
+
assert_raise CalledOnce do
|
78
|
+
assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
|
79
|
+
end
|
80
|
+
|
81
|
+
Log.severity = 0
|
82
|
+
Document.persist :persisted_words, :annotations, :file => Rbbt.tmp.test.persisted_words.find(:user)
|
83
|
+
|
84
|
+
$called_once = false
|
85
|
+
text.persisted_words
|
86
|
+
assert $called_once
|
87
|
+
|
88
|
+
assert_nothing_raised do
|
89
|
+
assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def test_persist_annotation_repo
|
94
|
+
text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
95
|
+
Document.setup(text, "TEST", "test_doc1", nil)
|
96
|
+
|
97
|
+
corpus = {}
|
98
|
+
corpus.extend Document::Corpus
|
99
|
+
|
100
|
+
corpus.add_document(text)
|
101
|
+
|
102
|
+
assert_equal "persisted_words", text.persisted_words.first.type
|
103
|
+
|
104
|
+
assert_raise CalledOnce do
|
105
|
+
assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
|
106
|
+
end
|
107
|
+
|
108
|
+
Log.severity = 0
|
109
|
+
Document.persist :persisted_words, :annotations, :annotation_repo => Rbbt.tmp.test.persisted_words_repo.find(:user)
|
110
|
+
|
111
|
+
$called_once = false
|
112
|
+
text.persisted_words
|
113
|
+
assert $called_once
|
114
|
+
|
115
|
+
assert_nothing_raised do
|
116
|
+
assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def test_persist_ner
|
121
|
+
text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
122
|
+
Document.setup(text, "TEST", "test_doc1", nil)
|
123
|
+
|
124
|
+
corpus = {}
|
125
|
+
corpus.extend Document::Corpus
|
126
|
+
|
127
|
+
corpus.add_document(text)
|
128
|
+
|
129
|
+
|
130
|
+
text.ner
|
131
|
+
|
132
|
+
$called_once = false
|
133
|
+
text.ner
|
134
|
+
|
135
|
+
assert ! $called_once
|
136
|
+
|
137
|
+
assert text.ner.first.segid.include?("TEST:")
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/document'
|
3
|
+
require 'rbbt/document/corpus'
|
4
|
+
|
5
|
+
class TestDocumentCorpus < Test::Unit::TestCase
|
6
|
+
def test_corpus
|
7
|
+
text = "This is a document"
|
8
|
+
Document.setup(text, "TEST", "test_doc1", nil)
|
9
|
+
|
10
|
+
corpus = Document::Corpus.setup({})
|
11
|
+
|
12
|
+
corpus.add_document(text)
|
13
|
+
|
14
|
+
docid = text.docid(corpus)
|
15
|
+
|
16
|
+
assert_equal docid.document, text
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_find
|
20
|
+
text = "This is a document"
|
21
|
+
Document.setup(text, "TEST", "test_doc1", nil)
|
22
|
+
|
23
|
+
TmpFile.with_file do |path|
|
24
|
+
corpus = Persist.open_tokyocabinet(path, true, :single, "BDB")
|
25
|
+
corpus.extend Document::Corpus
|
26
|
+
|
27
|
+
corpus.add_document(text)
|
28
|
+
|
29
|
+
assert corpus.prefix("TEST:").include?(text.docid)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
@@ -8,13 +8,13 @@ require 'rbbt/sources/NCI'
|
|
8
8
|
|
9
9
|
class TestFinder < Test::Unit::TestCase
|
10
10
|
|
11
|
-
def
|
11
|
+
def _test_namespace_and_format
|
12
12
|
f = Finder.new(CMD.cmd("head -n 1000", :in => Open.open(Organism.identifiers(Organism.default_code("Hsa")).produce.find)))
|
13
13
|
assert_equal Organism.default_code("Hsa"), f.instances.first.namespace
|
14
14
|
assert_equal "Ensembl Gene ID", f.instances.first.format
|
15
15
|
end
|
16
16
|
|
17
|
-
def
|
17
|
+
def _test_find
|
18
18
|
f = Finder.new(Organism.lexicon(Organism.default_code("Hsa")), :grep => ["SF3B1"])
|
19
19
|
|
20
20
|
assert_equal "ENSG00000115524", f.find("SF3B1").first
|
@@ -23,7 +23,7 @@ class TestFinder < Test::Unit::TestCase
|
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
-
def
|
26
|
+
def _test_find2
|
27
27
|
f = Finder.new(Organism.lexicon(Organism.default_code("Hsa")), :grep => ["RASGRF2"])
|
28
28
|
|
29
29
|
m = f.find("RAS").first
|
@@ -9,7 +9,17 @@ We found that TP53 is regulated by MDM2 in Homo sapiens
|
|
9
9
|
EOF
|
10
10
|
|
11
11
|
mentions = GNormPlus.process({:file => text})
|
12
|
-
|
12
|
+
assert_equal 1, mentions.length
|
13
|
+
assert_equal 2, mentions["file"].length
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_entities
|
17
|
+
text =<<-EOF
|
18
|
+
We found that TP53 is regulated by MDM2 in Homo sapiens
|
19
|
+
EOF
|
20
|
+
|
21
|
+
mentions = GNormPlus.entities({:file => text})
|
22
|
+
mentions["file"].include? "TP53"
|
13
23
|
end
|
14
24
|
end
|
15
25
|
|
@@ -2,17 +2,17 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.r
|
|
2
2
|
require 'rbbt/ner/patterns'
|
3
3
|
|
4
4
|
class TestPatternRelExt < Test::Unit::TestCase
|
5
|
-
def
|
5
|
+
def _test_simple_pattern
|
6
6
|
text = "Experiments have shown that TP53 interacts with CDK5 under certain conditions"
|
7
7
|
|
8
8
|
gene1 = "TP53"
|
9
|
-
NamedEntity.setup(gene1, text.index(gene1), "Gene")
|
9
|
+
NamedEntity.setup(gene1, :offset => text.index(gene1), :entity_type => "Gene")
|
10
10
|
|
11
11
|
gene2 = "CDK5"
|
12
|
-
NamedEntity.setup(gene2, text.index(gene2), "Gene")
|
12
|
+
NamedEntity.setup(gene2, :offset => text.index(gene2), :entity_type => "Gene")
|
13
13
|
|
14
14
|
interaction = "interacts"
|
15
|
-
NamedEntity.setup(interaction, text.index(interaction), "Interaction")
|
15
|
+
NamedEntity.setup(interaction, :offset => text.index(interaction), :entity_type => "Interaction")
|
16
16
|
|
17
17
|
Segmented.setup(text, [gene1, gene2, interaction])
|
18
18
|
|
@@ -23,13 +23,13 @@ class TestPatternRelExt < Test::Unit::TestCase
|
|
23
23
|
text = "Experiments have shown that TP53 found in cultivated cells interacts with CDK5 under certain conditions"
|
24
24
|
|
25
25
|
gene1 = "TP53"
|
26
|
-
NamedEntity.setup(gene1, text.index(gene1), "Gene")
|
26
|
+
NamedEntity.setup(gene1, :offset => text.index(gene1), :entity_type => "Gene")
|
27
27
|
|
28
28
|
gene2 = "CDK5"
|
29
|
-
NamedEntity.setup(gene2, text.index(gene2), "Gene")
|
29
|
+
NamedEntity.setup(gene2, :offset => text.index(gene2), :entity_type => "Gene")
|
30
30
|
|
31
31
|
interaction = "interacts"
|
32
|
-
NamedEntity.setup(interaction, text.index(interaction), "Interaction")
|
32
|
+
NamedEntity.setup(interaction, :offset => text.index(interaction), :entity_type => "Interaction")
|
33
33
|
|
34
34
|
Segmented.setup(text, {:entities => [gene1, gene2, interaction]})
|
35
35
|
|
@@ -40,7 +40,7 @@ class TestPatternRelExt < Test::Unit::TestCase
|
|
40
40
|
PatternRelExt.new(["NP[entity:Gene] VP[stem:interacts] with NP[entity:Gene]"]).match_sentences([text]).first.first
|
41
41
|
end
|
42
42
|
|
43
|
-
def
|
43
|
+
def _test_chunk_pattern
|
44
44
|
text = "There is a concern with the use of thiazolidinediones in patients with an increased risk of colon cancer (e.g., familial colon polyposis)."
|
45
45
|
|
46
46
|
drug = "thiazolidinediones"
|
@@ -57,7 +57,7 @@ class TestPatternRelExt < Test::Unit::TestCase
|
|
57
57
|
end
|
58
58
|
|
59
59
|
|
60
|
-
def
|
60
|
+
def _test_entities_with_spaces
|
61
61
|
PatternRelExt.new("NP[entity:Gene Name]").token_trie
|
62
62
|
end
|
63
63
|
|