rbbt-text 1.3.4 → 1.3.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +20 -0
- data/lib/rbbt/document/annotation.rb +2 -2
- data/lib/rbbt/document/corpus/pubmed.rb +14 -5
- data/lib/rbbt/document/corpus.rb +10 -7
- data/lib/rbbt/document.rb +7 -3
- data/lib/rbbt/ner/abner.rb +3 -2
- data/lib/rbbt/ner/banner.rb +3 -1
- data/lib/rbbt/ner/brat.rb +1 -1
- data/lib/rbbt/ner/linnaeus.rb +2 -1
- data/lib/rbbt/ner/oscar3.rb +0 -1
- data/lib/rbbt/ner/oscar4.rb +0 -1
- data/lib/rbbt/ner/rner.rb +229 -0
- data/lib/rbbt/ner/rnorm/tokens.rb +3 -1
- data/lib/rbbt/ner/rnorm.rb +5 -1
- data/lib/rbbt/ner/token_trieNER.rb +2 -1
- data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +1 -1
- data/lib/rbbt/nlp/spaCy.rb +158 -15
- data/lib/rbbt/relationship.rb +24 -0
- data/lib/rbbt/segment/named_entity.rb +4 -0
- data/lib/rbbt/segment/range_index.rb +1 -1
- data/lib/rbbt/segment/transformed.rb +9 -1
- data/lib/rbbt/segment.rb +3 -0
- data/share/install/software/OpenNLP +3 -8
- data/share/rner/config.rb +51 -0
- data/test/rbbt/document/corpus/test_pubmed.rb +1 -1
- data/test/rbbt/document/test_annotation.rb +10 -1
- data/test/rbbt/document/test_corpus.rb +14 -0
- data/test/rbbt/ner/rnorm/test_tokens.rb +11 -0
- data/test/rbbt/ner/test_rner.rb +132 -0
- data/test/rbbt/ner/test_rnorm.rb +5 -0
- data/test/rbbt/segment/test_named_entity.rb +2 -1
- data/test/rbbt/segment/test_transformed.rb +13 -30
- data/test/test_spaCy.rb +113 -1
- metadata +13 -18
data/test/test_spaCy.rb
CHANGED
@@ -3,7 +3,7 @@ require 'rbbt/nlp/spaCy'
|
|
3
3
|
require 'rbbt/document/corpus'
|
4
4
|
|
5
5
|
class TestSpaCy < Test::Unit::TestCase
|
6
|
-
def
|
6
|
+
def test_tokens
|
7
7
|
text = "I tell a story"
|
8
8
|
|
9
9
|
tokens = SpaCy.tokens(text)
|
@@ -12,6 +12,16 @@ class TestSpaCy < Test::Unit::TestCase
|
|
12
12
|
assert_equal "tell", tokens[1].to_s
|
13
13
|
end
|
14
14
|
|
15
|
+
def test_chunks
|
16
|
+
text = "Miguel Vazquez tell a good story"
|
17
|
+
|
18
|
+
tokens = SpaCy.chunks(text)
|
19
|
+
|
20
|
+
assert_equal 2, tokens.length
|
21
|
+
assert_equal "Miguel Vazquez", tokens[0].to_s
|
22
|
+
end
|
23
|
+
|
24
|
+
|
15
25
|
def test_segments
|
16
26
|
text = "I tell a story. It's a very good story."
|
17
27
|
|
@@ -28,5 +38,107 @@ class TestSpaCy < Test::Unit::TestCase
|
|
28
38
|
assert_equal segment, segment.segid.tap{|e| e.corpus = corpus}.segment
|
29
39
|
end
|
30
40
|
end
|
41
|
+
|
42
|
+
def test_chunk_segments
|
43
|
+
text = "I tell a story. It's a very good story."
|
44
|
+
|
45
|
+
corpus = Document::Corpus.setup({})
|
46
|
+
|
47
|
+
Document.setup(text, "TEST", "test_doc1", "simple_sentence")
|
48
|
+
|
49
|
+
corpus.add_document text
|
50
|
+
text.corpus = corpus
|
51
|
+
|
52
|
+
segments = SpaCy.chunk_segments(text)
|
53
|
+
|
54
|
+
segments.each do |segment|
|
55
|
+
assert_equal segment, segment.segid.tap{|e| e.corpus = corpus}.segment
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_dep_graph
|
60
|
+
text = "Meanwhile, TF antisense treatment activated the human ASBT promoter 5-fold and not only abrogated interleukin-1beta-mediated repression but led to a paradoxical increase in TG promoter activity"
|
61
|
+
graph = SpaCy.dep_graph(text, true)
|
62
|
+
|
63
|
+
tokens = SpaCy.segments(text)
|
64
|
+
index = Segment.index tokens
|
65
|
+
tf_s = tokens.select{|t| t == "TF" }.first
|
66
|
+
tg_s = tokens.select{|t| t == "TG" }.first
|
67
|
+
|
68
|
+
require 'rbbt/network/paths'
|
69
|
+
|
70
|
+
path = Paths.dijkstra(graph, tf_s.segid, [tg_s.segid])
|
71
|
+
path_tokens = path.collect do |segid|
|
72
|
+
range = Range.new(*segid.split(":").last.split("..").map(&:to_i))
|
73
|
+
text[range]
|
74
|
+
end
|
75
|
+
|
76
|
+
assert path_tokens.include? 'increase'
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
def test_chunk_dep_graph
|
81
|
+
text = "Meanwhile, TF antisense treatment activated the human ASBT promoter 5-fold and not only abrogated interleukin-1beta-mediated repression but led to a paradoxical increase in TG promoter activity"
|
82
|
+
graph = SpaCy.chunk_dep_graph(text, true)
|
83
|
+
|
84
|
+
tokens = SpaCy.chunk_segments(text)
|
85
|
+
index = Segment.index tokens
|
86
|
+
tf_s = tokens.select{|t| t.include? "TF" }.first
|
87
|
+
tg_s = tokens.select{|t| t.include? "TG" }.first
|
88
|
+
|
89
|
+
|
90
|
+
require 'rbbt/network/paths'
|
91
|
+
|
92
|
+
path = Paths.dijkstra(graph, tf_s.segid, [tg_s.segid])
|
93
|
+
path_tokens = path.collect do |segid|
|
94
|
+
range = Range.new(*segid.split(":").last.split("..").map(&:to_i))
|
95
|
+
text[range]
|
96
|
+
end
|
97
|
+
|
98
|
+
assert path_tokens.include? 'increase'
|
99
|
+
end
|
100
|
+
|
101
|
+
def test_paths
|
102
|
+
text = "Meanwhile, TF antisense treatment activated the human ASBT promoter 5-fold and not only abrogated interleukin-1beta-mediated repression but led to a paradoxical increase in TG promoter activity"
|
103
|
+
path = SpaCy.paths(text, Segment.setup("TF", :offset => text.index("TF")), Segment.setup("TG",:offset => text.index("TG")))
|
104
|
+
|
105
|
+
|
106
|
+
path_tokens = path.collect do |segid|
|
107
|
+
range = Range.new(*segid.split(":").last.split("..").map(&:to_i))
|
108
|
+
text[range]
|
109
|
+
end
|
110
|
+
|
111
|
+
ppp text
|
112
|
+
iii path_tokens
|
113
|
+
|
114
|
+
assert path_tokens.include? 'increase'
|
115
|
+
end
|
116
|
+
|
117
|
+
def test_paths2
|
118
|
+
text = "Deletion and domain swap experiments identified small, discreet positive and negative elements in A-Myb and TF that were required for the regulation of specific genes, such as DHRS2, TG, and mim-1"
|
119
|
+
path = SpaCy.paths(text, Segment.setup("TF", :offset => text.index("TF")), Segment.setup("TG",:offset => text.index("TG")))
|
120
|
+
|
121
|
+
|
122
|
+
path_tokens = path.collect do |segid|
|
123
|
+
range = Range.new(*segid.split(":").last.split("..").map(&:to_i))
|
124
|
+
text[range]
|
125
|
+
end
|
126
|
+
|
127
|
+
iii path_tokens
|
128
|
+
|
129
|
+
|
130
|
+
assert path_tokens.include? 'regulation'
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_paths3
|
134
|
+
text = "Therefore, we speculate that PEA3 factors may contribute to the up-regulation of COX-2 expression resulting from both APC mutation and Wnt1 expression"
|
135
|
+
path = SpaCy.paths(text, *Segment.align(text,["PEA3", "Wnt1"]))
|
136
|
+
|
137
|
+
path_tokens = path.collect do |segid|
|
138
|
+
range = Range.new(*segid.split(":").last.split("..").map(&:to_i))
|
139
|
+
text[range]
|
140
|
+
end
|
141
|
+
|
142
|
+
end
|
31
143
|
end
|
32
144
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-06-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -38,20 +38,6 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: libxml-ruby
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
41
|
- !ruby/object:Gem::Dependency
|
56
42
|
name: json
|
57
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -72,8 +58,10 @@ email: miguel.vazquez@fdi.ucm.es
|
|
72
58
|
executables:
|
73
59
|
- get_ppis.rb
|
74
60
|
extensions: []
|
75
|
-
extra_rdoc_files:
|
61
|
+
extra_rdoc_files:
|
62
|
+
- LICENSE
|
76
63
|
files:
|
64
|
+
- LICENSE
|
77
65
|
- bin/get_ppis.rb
|
78
66
|
- lib/rbbt/bow/bow.rb
|
79
67
|
- lib/rbbt/bow/dictionary.rb
|
@@ -95,6 +83,7 @@ files:
|
|
95
83
|
- lib/rbbt/ner/oscar4.rb
|
96
84
|
- lib/rbbt/ner/patterns.rb
|
97
85
|
- lib/rbbt/ner/regexpNER.rb
|
86
|
+
- lib/rbbt/ner/rner.rb
|
98
87
|
- lib/rbbt/ner/rnorm.rb
|
99
88
|
- lib/rbbt/ner/rnorm/cue_index.rb
|
100
89
|
- lib/rbbt/ner/rnorm/tokens.rb
|
@@ -103,6 +92,7 @@ files:
|
|
103
92
|
- lib/rbbt/nlp/nlp.rb
|
104
93
|
- lib/rbbt/nlp/open_nlp/sentence_splitter.rb
|
105
94
|
- lib/rbbt/nlp/spaCy.rb
|
95
|
+
- lib/rbbt/relationship.rb
|
106
96
|
- lib/rbbt/segment.rb
|
107
97
|
- lib/rbbt/segment/annotation.rb
|
108
98
|
- lib/rbbt/segment/encoding.rb
|
@@ -126,6 +116,7 @@ files:
|
|
126
116
|
- share/install/software/OpenNLP
|
127
117
|
- share/install/software/StanfordParser
|
128
118
|
- share/patterns/drug_induce_disease
|
119
|
+
- share/rner/config.rb
|
129
120
|
- share/rnorm/cue_default
|
130
121
|
- share/rnorm/tokens_default
|
131
122
|
- share/wordlists/stopwords
|
@@ -136,6 +127,7 @@ files:
|
|
136
127
|
- test/rbbt/document/test_annotation.rb
|
137
128
|
- test/rbbt/document/test_corpus.rb
|
138
129
|
- test/rbbt/entity/test_document.rb
|
130
|
+
- test/rbbt/ner/rnorm/test_tokens.rb
|
139
131
|
- test/rbbt/ner/test_NER.rb
|
140
132
|
- test/rbbt/ner/test_abner.rb
|
141
133
|
- test/rbbt/ner/test_banner.rb
|
@@ -148,6 +140,7 @@ files:
|
|
148
140
|
- test/rbbt/ner/test_oscar4.rb
|
149
141
|
- test/rbbt/ner/test_patterns.rb
|
150
142
|
- test/rbbt/ner/test_regexpNER.rb
|
143
|
+
- test/rbbt/ner/test_rner.rb
|
151
144
|
- test/rbbt/ner/test_rnorm.rb
|
152
145
|
- test/rbbt/ner/test_token_trieNER.rb
|
153
146
|
- test/rbbt/nlp/genia/test_sentence_splitter.rb
|
@@ -182,7 +175,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
182
175
|
- !ruby/object:Gem::Version
|
183
176
|
version: '0'
|
184
177
|
requirements: []
|
185
|
-
rubygems_version: 3.
|
178
|
+
rubygems_version: 3.1.4
|
186
179
|
signing_key:
|
187
180
|
specification_version: 4
|
188
181
|
summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
|
@@ -201,6 +194,7 @@ test_files:
|
|
201
194
|
- test/rbbt/ner/test_patterns.rb
|
202
195
|
- test/rbbt/ner/test_NER.rb
|
203
196
|
- test/rbbt/ner/test_abner.rb
|
197
|
+
- test/rbbt/ner/rnorm/test_tokens.rb
|
204
198
|
- test/rbbt/ner/test_rnorm.rb
|
205
199
|
- test/rbbt/ner/test_regexpNER.rb
|
206
200
|
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
@@ -210,6 +204,7 @@ test_files:
|
|
210
204
|
- test/rbbt/ner/test_banner.rb
|
211
205
|
- test/rbbt/ner/test_token_trieNER.rb
|
212
206
|
- test/rbbt/ner/test_finder.rb
|
207
|
+
- test/rbbt/ner/test_rner.rb
|
213
208
|
- test/rbbt/ner/test_linnaeus.rb
|
214
209
|
- test/rbbt/ner/test_oscar4.rb
|
215
210
|
- test/rbbt/test_segment.rb
|