rbbt-text 1.1.9 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/bow/bow.rb +5 -2
- data/lib/rbbt/bow/dictionary.rb +27 -23
- data/lib/rbbt/document.rb +56 -0
- data/lib/rbbt/document/annotation.rb +45 -0
- data/lib/rbbt/document/corpus.rb +61 -0
- data/lib/rbbt/document/corpus/pubmed.rb +33 -0
- data/lib/rbbt/ner/NER.rb +3 -3
- data/lib/rbbt/ner/abner.rb +1 -1
- data/lib/rbbt/ner/banner.rb +1 -1
- data/lib/rbbt/ner/brat.rb +1 -1
- data/lib/rbbt/ner/chemical_tagger.rb +1 -2
- data/lib/rbbt/ner/g_norm_plus.rb +42 -12
- data/lib/rbbt/ner/linnaeus.rb +3 -3
- data/lib/rbbt/ner/ngram_prefix_dictionary.rb +3 -3
- data/lib/rbbt/ner/oscar3.rb +1 -2
- data/lib/rbbt/ner/oscar4.rb +3 -3
- data/lib/rbbt/ner/patterns.rb +5 -5
- data/lib/rbbt/ner/regexpNER.rb +1 -2
- data/lib/rbbt/ner/token_trieNER.rb +35 -22
- data/lib/rbbt/nlp/genia/sentence_splitter.rb +3 -2
- data/lib/rbbt/nlp/nlp.rb +5 -5
- data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +37 -36
- data/lib/rbbt/nlp/spaCy.rb +52 -0
- data/lib/rbbt/segment.rb +179 -0
- data/lib/rbbt/segment/annotation.rb +58 -0
- data/lib/rbbt/segment/encoding.rb +18 -0
- data/lib/rbbt/{text/segment → segment}/named_entity.rb +11 -10
- data/lib/rbbt/segment/overlaps.rb +63 -0
- data/lib/rbbt/segment/range_index.rb +35 -0
- data/lib/rbbt/segment/relationship.rb +7 -0
- data/lib/rbbt/{text/segment → segment}/segmented.rb +1 -1
- data/lib/rbbt/segment/token.rb +23 -0
- data/lib/rbbt/{text/segment → segment}/transformed.rb +10 -8
- data/lib/rbbt/segment/tsv.rb +41 -0
- data/share/install/software/Linnaeus +1 -1
- data/share/install/software/OpenNLP +1 -1
- data/test/rbbt/document/corpus/test_pubmed.rb +15 -0
- data/test/rbbt/document/test_annotation.rb +140 -0
- data/test/rbbt/document/test_corpus.rb +33 -0
- data/test/rbbt/ner/test_finder.rb +3 -3
- data/test/rbbt/ner/test_g_norm_plus.rb +20 -3
- data/test/rbbt/ner/test_patterns.rb +9 -9
- data/test/rbbt/ner/test_regexpNER.rb +14 -14
- data/test/rbbt/ner/test_rnorm.rb +3 -4
- data/test/rbbt/ner/test_token_trieNER.rb +1 -0
- data/test/rbbt/nlp/genia/test_sentence_splitter.rb +37 -3
- data/test/rbbt/nlp/open_nlp/test_sentence_splitter.rb +20 -4
- data/test/rbbt/segment/test_annotation.rb +39 -0
- data/test/rbbt/segment/test_corpus.rb +36 -0
- data/test/rbbt/segment/test_encoding.rb +24 -0
- data/test/rbbt/{text/segment → segment}/test_named_entity.rb +15 -11
- data/test/rbbt/segment/test_overlaps.rb +69 -0
- data/test/rbbt/segment/test_range_index.rb +42 -0
- data/test/rbbt/{text/segment → segment}/test_transformed.rb +105 -51
- data/test/rbbt/test_document.rb +14 -0
- data/test/rbbt/test_segment.rb +182 -0
- data/test/test_helper.rb +5 -3
- data/test/test_spaCy.rb +32 -0
- metadata +44 -32
- data/lib/rbbt/text/corpus.rb +0 -106
- data/lib/rbbt/text/corpus/document.rb +0 -361
- data/lib/rbbt/text/corpus/document_repo.rb +0 -68
- data/lib/rbbt/text/corpus/sources/pmid.rb +0 -34
- data/lib/rbbt/text/document.rb +0 -39
- data/lib/rbbt/text/segment.rb +0 -355
- data/lib/rbbt/text/segment/docid.rb +0 -46
- data/lib/rbbt/text/segment/relationship.rb +0 -24
- data/lib/rbbt/text/segment/token.rb +0 -49
- data/test/rbbt/text/corpus/sources/test_pmid.rb +0 -33
- data/test/rbbt/text/corpus/test_document.rb +0 -52
- data/test/rbbt/text/segment/test_relationship.rb +0 -0
- data/test/rbbt/text/segment/test_segmented.rb +0 -23
- data/test/rbbt/text/test_corpus.rb +0 -34
- data/test/rbbt/text/test_document.rb +0 -58
- data/test/rbbt/text/test_segment.rb +0 -100
@@ -1,6 +1,3 @@
|
|
1
|
-
require 'rbbt/util/misc'
|
2
|
-
require 'rbbt/text/segment'
|
3
|
-
|
4
1
|
module Transformed
|
5
2
|
|
6
3
|
def self.transform(text, segments, replacement = nil, &block)
|
@@ -71,6 +68,10 @@ module Transformed
|
|
71
68
|
|
72
69
|
segments = [segments] unless Array === segments
|
73
70
|
orig_length = self.length
|
71
|
+
|
72
|
+
offset = self.respond_to?(:offset) ? self.offset.to_i : 0
|
73
|
+
segments = segments.select{|s| s.offset.to_i >= offset && s.offset.to_i <= offset + self.length - 1 }
|
74
|
+
|
74
75
|
Segment.clean_sort(segments).each do |segment|
|
75
76
|
next if segment.offset.nil?
|
76
77
|
|
@@ -89,7 +90,7 @@ module Transformed
|
|
89
90
|
|
90
91
|
updated_text = self[updated_begin..updated_end]
|
91
92
|
if updated_text.nil?
|
92
|
-
Log.warn "Range outside of segment: #{self.length} #{segment.
|
93
|
+
Log.warn "Range outside of segment: #{self.length} #{segment.range} (#{updated_range})"
|
93
94
|
next
|
94
95
|
end
|
95
96
|
|
@@ -122,13 +123,13 @@ module Transformed
|
|
122
123
|
def fix_segment(segment, range, diff)
|
123
124
|
case
|
124
125
|
# Before
|
125
|
-
when segment.
|
126
|
+
when segment.eend < range.begin
|
126
127
|
# After
|
127
128
|
when segment.offset.to_i > range.end + diff
|
128
129
|
segment.offset = segment.offset.to_i - diff
|
129
130
|
# Includes
|
130
|
-
when (segment.offset.to_i <= range.begin and segment.
|
131
|
-
segment.replace self[segment.offset.to_i..segment.
|
131
|
+
when (segment.offset.to_i <= range.begin and segment.eend >= range.end + diff)
|
132
|
+
segment.replace self[segment.offset.to_i..segment.eend - diff]
|
132
133
|
else
|
133
134
|
raise "Segment Overlaps"
|
134
135
|
end
|
@@ -141,7 +142,8 @@ module Transformed
|
|
141
142
|
|
142
143
|
if first_only
|
143
144
|
@transformation_stack.pop.reverse.each do |id|
|
144
|
-
|
145
|
+
segment_info = @transformed_segments.delete id
|
146
|
+
orig_range, diff, text, range = segment_info
|
145
147
|
|
146
148
|
new_range = (range.begin..range.last + diff)
|
147
149
|
self[new_range] = text
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#module Segment
|
2
|
+
#
|
3
|
+
# def self.set_tsv_fields(fields, segments)
|
4
|
+
# tsv_fields = []
|
5
|
+
# add_types = ! (fields.delete(:no_types) || fields.delete("no_types") || fields.include?(:JSON) || fields.include?("JSON"))
|
6
|
+
# literal = (fields.delete(:literal) || fields.delete("literal"))
|
7
|
+
# tsv_fields << "Start" << "End"
|
8
|
+
# tsv_fields << :annotation_types if add_types
|
9
|
+
# tsv_fields << :literal if literal
|
10
|
+
#
|
11
|
+
# if fields.any? and not (fields == [:all] or fields == ["all"])
|
12
|
+
# tsv_fields.concat fields
|
13
|
+
# else
|
14
|
+
# tsv_fields.concat segments.first.annotations if segments.any?
|
15
|
+
# end
|
16
|
+
# tsv_fields
|
17
|
+
# tsv_fields.collect!{|f| f.to_s}
|
18
|
+
# tsv_fields.delete "offset"
|
19
|
+
# tsv_fields
|
20
|
+
# end
|
21
|
+
#
|
22
|
+
# def self.tsv(segments, *fields)
|
23
|
+
# fields = set_tsv_fields fields, segments
|
24
|
+
# tsv = TSV.setup({}, :key_field => "ID", :fields => fields, :type => :double)
|
25
|
+
#
|
26
|
+
# segments.each do |segment|
|
27
|
+
# tsv[segment.segment_id] = self.tsv_values_for_segment(segment, fields)
|
28
|
+
# end
|
29
|
+
#
|
30
|
+
# tsv
|
31
|
+
# end
|
32
|
+
#
|
33
|
+
# def self.load_tsv(tsv)
|
34
|
+
# fields = tsv.fields
|
35
|
+
# tsv.with_unnamed do
|
36
|
+
# tsv.collect do |id, values|
|
37
|
+
# Annotated.load_tsv_values(id, values, fields)
|
38
|
+
# end
|
39
|
+
# end
|
40
|
+
# end
|
41
|
+
#end
|
@@ -12,7 +12,7 @@ pkg_dir="`opt_dir \"$name\"`"
|
|
12
12
|
build_dir=`build_dir`
|
13
13
|
mv "$build_dir" "$pkg_dir"
|
14
14
|
tmp_file="~/.rbbt/tmp/species-proxy-properties.tmp"
|
15
|
-
mkdir -p $(
|
15
|
+
mkdir -p $(dirname "$tmp_file")
|
16
16
|
cat "$pkg_dir/species-proxy/properties.conf" |grep -v "^.dir =" >> $tmp_file
|
17
17
|
echo "\$dir = $pkg_dir/species-proxy/" > "$pkg_dir/species-proxy/properties.conf"
|
18
18
|
cat $tmp_file | grep -v "^#" >> "$pkg_dir/species-proxy/properties.conf"
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/document'
|
3
|
+
require 'rbbt/document/corpus'
|
4
|
+
require 'rbbt/document/corpus/pubmed'
|
5
|
+
|
6
|
+
class TestCorpusPubmed < Test::Unit::TestCase
|
7
|
+
def test_add_pmid
|
8
|
+
corpus = Document::Corpus.setup({})
|
9
|
+
|
10
|
+
document = corpus.add_pmid("32299157", :abstract).first
|
11
|
+
title = document.to(:title)
|
12
|
+
assert title.include?("COVID-19")
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
@@ -0,0 +1,140 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/document'
|
3
|
+
require 'rbbt/document/corpus'
|
4
|
+
require 'rbbt/segment'
|
5
|
+
require 'rbbt/document/annotation'
|
6
|
+
require 'rbbt/segment/named_entity'
|
7
|
+
|
8
|
+
class TestAnnotation < Test::Unit::TestCase
|
9
|
+
class CalledOnce < Exception; end
|
10
|
+
def setup
|
11
|
+
Document.define :words do
|
12
|
+
self.split(" ")
|
13
|
+
end
|
14
|
+
|
15
|
+
$called_once = false
|
16
|
+
Document.define :persisted_words do
|
17
|
+
raise CalledOnce if $called_once
|
18
|
+
$called_once = true
|
19
|
+
self.split(" ")
|
20
|
+
end
|
21
|
+
|
22
|
+
Document.define_multiple :multiple_words do |list|
|
23
|
+
list.collect{|doc| doc.words}
|
24
|
+
end
|
25
|
+
|
26
|
+
Document.define :ner do
|
27
|
+
$called_once = true
|
28
|
+
self.split(" ").collect{|e| NamedEntity.setup(e, :code => Misc.digest(e)) }
|
29
|
+
end
|
30
|
+
|
31
|
+
Document.persist :ner
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_define
|
35
|
+
text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
36
|
+
Document.setup(text, "TEST", "test_doc1", nil)
|
37
|
+
|
38
|
+
corpus = {}
|
39
|
+
Document::Corpus.setup corpus
|
40
|
+
|
41
|
+
corpus.add_document(text)
|
42
|
+
|
43
|
+
assert_equal text[text.words[1].range], text.words[1]
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_define_multiple
|
47
|
+
text1 = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
48
|
+
text2 = "This is another sentence"
|
49
|
+
Document.setup(text1, "TEST", "test_doc1", nil)
|
50
|
+
Document.setup(text2, "TEST", "test_doc2", nil)
|
51
|
+
|
52
|
+
corpus = {}
|
53
|
+
Document::Corpus.setup corpus
|
54
|
+
|
55
|
+
corpus.add_document(text1)
|
56
|
+
corpus.add_document(text2)
|
57
|
+
|
58
|
+
assert_equal 2, Document.setup([text1, text2]).multiple_words.length
|
59
|
+
assert_equal text1.split(" "), text1.multiple_words
|
60
|
+
|
61
|
+
#Document.persist :multiple_words, :annotations, :annotation_repo => Rbbt.tmp.test.multiple_words
|
62
|
+
#assert_equal 2, Document.setup([text1, text2]).multiple_words.length
|
63
|
+
#assert_equal text1.split(" "), text1.multiple_words
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_persist
|
67
|
+
text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
68
|
+
Document.setup(text, "TEST", "test_doc1", nil)
|
69
|
+
|
70
|
+
corpus = {}
|
71
|
+
Document::Corpus.setup corpus
|
72
|
+
|
73
|
+
corpus.add_document(text)
|
74
|
+
|
75
|
+
assert_equal "persisted_words", text.persisted_words.first.type
|
76
|
+
|
77
|
+
assert_raise CalledOnce do
|
78
|
+
assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
|
79
|
+
end
|
80
|
+
|
81
|
+
Log.severity = 0
|
82
|
+
Document.persist :persisted_words, :annotations, :file => Rbbt.tmp.test.persisted_words.find(:user)
|
83
|
+
|
84
|
+
$called_once = false
|
85
|
+
text.persisted_words
|
86
|
+
assert $called_once
|
87
|
+
|
88
|
+
assert_nothing_raised do
|
89
|
+
assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def test_persist_annotation_repo
|
94
|
+
text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
95
|
+
Document.setup(text, "TEST", "test_doc1", nil)
|
96
|
+
|
97
|
+
corpus = {}
|
98
|
+
Document::Corpus.setup corpus
|
99
|
+
|
100
|
+
corpus.add_document(text)
|
101
|
+
|
102
|
+
assert_equal "persisted_words", text.persisted_words.first.type
|
103
|
+
|
104
|
+
assert_raise CalledOnce do
|
105
|
+
assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
|
106
|
+
end
|
107
|
+
|
108
|
+
Log.severity = 0
|
109
|
+
Document.persist :persisted_words, :annotations, :annotation_repo => Rbbt.tmp.test.persisted_words_repo.find(:user)
|
110
|
+
|
111
|
+
$called_once = false
|
112
|
+
text.persisted_words
|
113
|
+
assert $called_once
|
114
|
+
|
115
|
+
assert_nothing_raised do
|
116
|
+
assert_equal text[text.persisted_words[1].range], text.persisted_words[1]
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def test_persist_ner
|
121
|
+
text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
122
|
+
Document.setup(text, "TEST", "test_doc1", nil)
|
123
|
+
|
124
|
+
corpus = {}
|
125
|
+
Document::Corpus.setup corpus
|
126
|
+
|
127
|
+
corpus.add_document(text)
|
128
|
+
|
129
|
+
|
130
|
+
text.ner
|
131
|
+
|
132
|
+
$called_once = false
|
133
|
+
text.ner
|
134
|
+
|
135
|
+
assert ! $called_once
|
136
|
+
|
137
|
+
assert text.ner.first.segid.include?("TEST:")
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/document'
|
3
|
+
require 'rbbt/document/corpus'
|
4
|
+
|
5
|
+
class TestDocumentCorpus < Test::Unit::TestCase
|
6
|
+
def test_corpus
|
7
|
+
text = "This is a document"
|
8
|
+
Document.setup(text, "TEST", "test_doc1", nil)
|
9
|
+
|
10
|
+
corpus = Document::Corpus.setup({})
|
11
|
+
|
12
|
+
corpus.add_document(text)
|
13
|
+
|
14
|
+
docid = text.docid(corpus)
|
15
|
+
|
16
|
+
assert_equal docid.document, text
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_find
|
20
|
+
text = "This is a document"
|
21
|
+
Document.setup(text, "TEST", "test_doc1", nil)
|
22
|
+
|
23
|
+
TmpFile.with_file do |path|
|
24
|
+
corpus = Persist.open_tokyocabinet(path, true, :single, "BDB")
|
25
|
+
corpus.extend Document::Corpus
|
26
|
+
|
27
|
+
corpus.add_document(text)
|
28
|
+
|
29
|
+
assert corpus.docids("TEST:").include?(text.docid)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
@@ -8,13 +8,13 @@ require 'rbbt/sources/NCI'
|
|
8
8
|
|
9
9
|
class TestFinder < Test::Unit::TestCase
|
10
10
|
|
11
|
-
def
|
11
|
+
def _test_namespace_and_format
|
12
12
|
f = Finder.new(CMD.cmd("head -n 1000", :in => Open.open(Organism.identifiers(Organism.default_code("Hsa")).produce.find)))
|
13
13
|
assert_equal Organism.default_code("Hsa"), f.instances.first.namespace
|
14
14
|
assert_equal "Ensembl Gene ID", f.instances.first.format
|
15
15
|
end
|
16
16
|
|
17
|
-
def
|
17
|
+
def _test_find
|
18
18
|
f = Finder.new(Organism.lexicon(Organism.default_code("Hsa")), :grep => ["SF3B1"])
|
19
19
|
|
20
20
|
assert_equal "ENSG00000115524", f.find("SF3B1").first
|
@@ -23,7 +23,7 @@ class TestFinder < Test::Unit::TestCase
|
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
-
def
|
26
|
+
def _test_find2
|
27
27
|
f = Finder.new(Organism.lexicon(Organism.default_code("Hsa")), :grep => ["RASGRF2"])
|
28
28
|
|
29
29
|
m = f.find("RAS").first
|
@@ -5,12 +5,29 @@ Log.severity = 0
|
|
5
5
|
class TestGNormPlus < Test::Unit::TestCase
|
6
6
|
def test_match
|
7
7
|
text =<<-EOF
|
8
|
-
We found that TP53 is regulated by MDM2 in Homo sapiens
|
9
|
-
EOF
|
10
8
|
|
9
|
+
Introduction
|
10
|
+
|
11
|
+
We found that TP53 is regulated by MDM2 in Homo
|
12
|
+
sapiens
|
13
|
+
EOF
|
11
14
|
|
12
15
|
mentions = GNormPlus.process({:file => text})
|
13
|
-
|
16
|
+
|
17
|
+
assert_equal 1, mentions.length
|
18
|
+
assert_equal 3, mentions["file"].length
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_entities
|
22
|
+
text =<<-EOF
|
23
|
+
We found that TP53 is regulated by MDM2 in Homo sapiens
|
24
|
+
EOF
|
25
|
+
|
26
|
+
mentions = GNormPlus.entities({:file => text})
|
27
|
+
assert mentions["file"].include?("TP53")
|
28
|
+
mentions["file"].each do |mention|
|
29
|
+
assert_equal mention, text[mention.range].sub("\n", ' ')
|
30
|
+
end
|
14
31
|
end
|
15
32
|
end
|
16
33
|
|
@@ -2,17 +2,17 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.r
|
|
2
2
|
require 'rbbt/ner/patterns'
|
3
3
|
|
4
4
|
class TestPatternRelExt < Test::Unit::TestCase
|
5
|
-
def
|
5
|
+
def _test_simple_pattern
|
6
6
|
text = "Experiments have shown that TP53 interacts with CDK5 under certain conditions"
|
7
7
|
|
8
8
|
gene1 = "TP53"
|
9
|
-
NamedEntity.setup(gene1, text.index(gene1), "Gene")
|
9
|
+
NamedEntity.setup(gene1, :offset => text.index(gene1), :entity_type => "Gene")
|
10
10
|
|
11
11
|
gene2 = "CDK5"
|
12
|
-
NamedEntity.setup(gene2, text.index(gene2), "Gene")
|
12
|
+
NamedEntity.setup(gene2, :offset => text.index(gene2), :entity_type => "Gene")
|
13
13
|
|
14
14
|
interaction = "interacts"
|
15
|
-
NamedEntity.setup(interaction, text.index(interaction), "Interaction")
|
15
|
+
NamedEntity.setup(interaction, :offset => text.index(interaction), :entity_type => "Interaction")
|
16
16
|
|
17
17
|
Segmented.setup(text, [gene1, gene2, interaction])
|
18
18
|
|
@@ -23,13 +23,13 @@ class TestPatternRelExt < Test::Unit::TestCase
|
|
23
23
|
text = "Experiments have shown that TP53 found in cultivated cells interacts with CDK5 under certain conditions"
|
24
24
|
|
25
25
|
gene1 = "TP53"
|
26
|
-
NamedEntity.setup(gene1, text.index(gene1), "Gene")
|
26
|
+
NamedEntity.setup(gene1, :offset => text.index(gene1), :entity_type => "Gene")
|
27
27
|
|
28
28
|
gene2 = "CDK5"
|
29
|
-
NamedEntity.setup(gene2, text.index(gene2), "Gene")
|
29
|
+
NamedEntity.setup(gene2, :offset => text.index(gene2), :entity_type => "Gene")
|
30
30
|
|
31
31
|
interaction = "interacts"
|
32
|
-
NamedEntity.setup(interaction, text.index(interaction), "Interaction")
|
32
|
+
NamedEntity.setup(interaction, :offset => text.index(interaction), :entity_type => "Interaction")
|
33
33
|
|
34
34
|
Segmented.setup(text, {:entities => [gene1, gene2, interaction]})
|
35
35
|
|
@@ -40,7 +40,7 @@ class TestPatternRelExt < Test::Unit::TestCase
|
|
40
40
|
PatternRelExt.new(["NP[entity:Gene] VP[stem:interacts] with NP[entity:Gene]"]).match_sentences([text]).first.first
|
41
41
|
end
|
42
42
|
|
43
|
-
def
|
43
|
+
def _test_chunk_pattern
|
44
44
|
text = "There is a concern with the use of thiazolidinediones in patients with an increased risk of colon cancer (e.g., familial colon polyposis)."
|
45
45
|
|
46
46
|
drug = "thiazolidinediones"
|
@@ -57,7 +57,7 @@ class TestPatternRelExt < Test::Unit::TestCase
|
|
57
57
|
end
|
58
58
|
|
59
59
|
|
60
|
-
def
|
60
|
+
def _test_entities_with_spaces
|
61
61
|
PatternRelExt.new("NP[entity:Gene Name]").token_trie
|
62
62
|
end
|
63
63
|
|
@@ -23,9 +23,9 @@ class TestRegExpNER < Test::Unit::TestCase
|
|
23
23
|
matches = RegExpNER.match_regexp_hash(sentence, regexp_hash)
|
24
24
|
|
25
25
|
assert_equal ["this", "this", "that"].sort, matches.sort
|
26
|
-
assert_equal "In ".length, matches.select{|m| m.
|
27
|
-
assert_equal "In this sentence I should find ".length, matches.select{|m| m.
|
28
|
-
assert_equal :this, matches.select{|m| m.
|
26
|
+
assert_equal "In ".length, matches.select{|m| m.entity_type == :this}[0].offset
|
27
|
+
assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this}[1].offset
|
28
|
+
assert_equal :this, matches.select{|m| m.entity_type == :this}[0].entity_type
|
29
29
|
end
|
30
30
|
|
31
31
|
def test_define_regexps
|
@@ -39,9 +39,9 @@ class TestRegExpNER < Test::Unit::TestCase
|
|
39
39
|
|
40
40
|
matches = ner.entities(sentence)
|
41
41
|
assert_equal ["this", "this", "that"].sort, matches.sort
|
42
|
-
assert_equal "In ".length, matches.select{|m| m.
|
43
|
-
assert_equal "In this sentence I should find ".length, matches.select{|m| m.
|
44
|
-
assert_equal :this, matches.select{|m| m.
|
42
|
+
assert_equal "In ".length, matches.select{|m| m.entity_type == :this }[0].offset
|
43
|
+
assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this }[1].offset
|
44
|
+
assert_equal :this, matches.select{|m| m.entity_type == :this }[0].entity_type
|
45
45
|
end
|
46
46
|
|
47
47
|
|
@@ -51,9 +51,9 @@ class TestRegExpNER < Test::Unit::TestCase
|
|
51
51
|
ner = RegExpNER.new({:this => /this/, :that => /that/})
|
52
52
|
matches = ner.entities(sentence)
|
53
53
|
assert_equal ["this", "this", "that"].sort, matches.sort
|
54
|
-
assert_equal "In ".length, matches.select{|m| m.
|
55
|
-
assert_equal "In this sentence I should find ".length, matches.select{|m| m.
|
56
|
-
assert_equal :this, matches.select{|m| m.
|
54
|
+
assert_equal "In ".length, matches.select{|m| m.entity_type == :this}[0].offset
|
55
|
+
assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this}[1].offset
|
56
|
+
assert_equal :this, matches.select{|m| m.entity_type == :this}[0].entity_type
|
57
57
|
|
58
58
|
Segmented.setup(sentence)
|
59
59
|
ner_this = RegExpNER.new({:this => /this/})
|
@@ -64,9 +64,9 @@ class TestRegExpNER < Test::Unit::TestCase
|
|
64
64
|
matches = sentence.segments
|
65
65
|
|
66
66
|
assert_equal ["this", "this", "that"].sort, matches.sort
|
67
|
-
assert_equal "In ".length, matches.select{|m| m.
|
68
|
-
assert_equal "In this sentence I should find ".length, matches.select{|m| m.
|
69
|
-
assert_equal :this, matches.select{|m| m.
|
67
|
+
assert_equal "In ".length, matches.select{|m| m.entity_type == :this}[0].offset
|
68
|
+
assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this}[1].offset
|
69
|
+
assert_equal :this, matches.select{|m| m.entity_type == :this}[0].entity_type
|
70
70
|
end
|
71
71
|
|
72
72
|
def test_entities_captures
|
@@ -75,8 +75,8 @@ class TestRegExpNER < Test::Unit::TestCase
|
|
75
75
|
ner = RegExpNER.new({:this => /this/, :that => /that/, :should => /I (should)/})
|
76
76
|
matches = ner.entities(sentence)
|
77
77
|
assert_equal ["this", "this", "that", "should"].sort, matches.sort
|
78
|
-
assert_equal "In this sentence I ".length, matches.select{|m| m.
|
79
|
-
assert_equal :should, matches.select{|m| m.
|
78
|
+
assert_equal "In this sentence I ".length, matches.select{|m| m.entity_type == :should}[0].offset
|
79
|
+
assert_equal :should, matches.select{|m| m.entity_type == :should}[0].entity_type
|
80
80
|
end
|
81
81
|
|
82
82
|
|