rbbt-text 1.3.4 → 1.3.7

Sign up to get free protection for your applications and to get access to all the features.
data/test/test_spaCy.rb CHANGED
@@ -3,7 +3,7 @@ require 'rbbt/nlp/spaCy'
3
3
  require 'rbbt/document/corpus'
4
4
 
5
5
  class TestSpaCy < Test::Unit::TestCase
6
- def _test_tokens
6
+ def test_tokens
7
7
  text = "I tell a story"
8
8
 
9
9
  tokens = SpaCy.tokens(text)
@@ -12,6 +12,16 @@ class TestSpaCy < Test::Unit::TestCase
12
12
  assert_equal "tell", tokens[1].to_s
13
13
  end
14
14
 
15
+ def test_chunks
16
+ text = "Miguel Vazquez tell a good story"
17
+
18
+ tokens = SpaCy.chunks(text)
19
+
20
+ assert_equal 2, tokens.length
21
+ assert_equal "Miguel Vazquez", tokens[0].to_s
22
+ end
23
+
24
+
15
25
  def test_segments
16
26
  text = "I tell a story. It's a very good story."
17
27
 
@@ -28,5 +38,107 @@ class TestSpaCy < Test::Unit::TestCase
28
38
  assert_equal segment, segment.segid.tap{|e| e.corpus = corpus}.segment
29
39
  end
30
40
  end
41
+
42
+ def test_chunk_segments
43
+ text = "I tell a story. It's a very good story."
44
+
45
+ corpus = Document::Corpus.setup({})
46
+
47
+ Document.setup(text, "TEST", "test_doc1", "simple_sentence")
48
+
49
+ corpus.add_document text
50
+ text.corpus = corpus
51
+
52
+ segments = SpaCy.chunk_segments(text)
53
+
54
+ segments.each do |segment|
55
+ assert_equal segment, segment.segid.tap{|e| e.corpus = corpus}.segment
56
+ end
57
+ end
58
+
59
+ def test_dep_graph
60
+ text = "Meanwhile, TF antisense treatment activated the human ASBT promoter 5-fold and not only abrogated interleukin-1beta-mediated repression but led to a paradoxical increase in TG promoter activity"
61
+ graph = SpaCy.dep_graph(text, true)
62
+
63
+ tokens = SpaCy.segments(text)
64
+ index = Segment.index tokens
65
+ tf_s = tokens.select{|t| t == "TF" }.first
66
+ tg_s = tokens.select{|t| t == "TG" }.first
67
+
68
+ require 'rbbt/network/paths'
69
+
70
+ path = Paths.dijkstra(graph, tf_s.segid, [tg_s.segid])
71
+ path_tokens = path.collect do |segid|
72
+ range = Range.new(*segid.split(":").last.split("..").map(&:to_i))
73
+ text[range]
74
+ end
75
+
76
+ assert path_tokens.include? 'increase'
77
+
78
+ end
79
+
80
+ def test_chunk_dep_graph
81
+ text = "Meanwhile, TF antisense treatment activated the human ASBT promoter 5-fold and not only abrogated interleukin-1beta-mediated repression but led to a paradoxical increase in TG promoter activity"
82
+ graph = SpaCy.chunk_dep_graph(text, true)
83
+
84
+ tokens = SpaCy.chunk_segments(text)
85
+ index = Segment.index tokens
86
+ tf_s = tokens.select{|t| t.include? "TF" }.first
87
+ tg_s = tokens.select{|t| t.include? "TG" }.first
88
+
89
+
90
+ require 'rbbt/network/paths'
91
+
92
+ path = Paths.dijkstra(graph, tf_s.segid, [tg_s.segid])
93
+ path_tokens = path.collect do |segid|
94
+ range = Range.new(*segid.split(":").last.split("..").map(&:to_i))
95
+ text[range]
96
+ end
97
+
98
+ assert path_tokens.include? 'increase'
99
+ end
100
+
101
+ def test_paths
102
+ text = "Meanwhile, TF antisense treatment activated the human ASBT promoter 5-fold and not only abrogated interleukin-1beta-mediated repression but led to a paradoxical increase in TG promoter activity"
103
+ path = SpaCy.paths(text, Segment.setup("TF", :offset => text.index("TF")), Segment.setup("TG",:offset => text.index("TG")))
104
+
105
+
106
+ path_tokens = path.collect do |segid|
107
+ range = Range.new(*segid.split(":").last.split("..").map(&:to_i))
108
+ text[range]
109
+ end
110
+
111
+ ppp text
112
+ iii path_tokens
113
+
114
+ assert path_tokens.include? 'increase'
115
+ end
116
+
117
+ def test_paths2
118
+ text = "Deletion and domain swap experiments identified small, discreet positive and negative elements in A-Myb and TF that were required for the regulation of specific genes, such as DHRS2, TG, and mim-1"
119
+ path = SpaCy.paths(text, Segment.setup("TF", :offset => text.index("TF")), Segment.setup("TG",:offset => text.index("TG")))
120
+
121
+
122
+ path_tokens = path.collect do |segid|
123
+ range = Range.new(*segid.split(":").last.split("..").map(&:to_i))
124
+ text[range]
125
+ end
126
+
127
+ iii path_tokens
128
+
129
+
130
+ assert path_tokens.include? 'regulation'
131
+ end
132
+
133
+ def test_paths3
134
+ text = "Therefore, we speculate that PEA3 factors may contribute to the up-regulation of COX-2 expression resulting from both APC mutation and Wnt1 expression"
135
+ path = SpaCy.paths(text, *Segment.align(text,["PEA3", "Wnt1"]))
136
+
137
+ path_tokens = path.collect do |segid|
138
+ range = Range.new(*segid.split(":").last.split("..").map(&:to_i))
139
+ text[range]
140
+ end
141
+
142
+ end
31
143
  end
32
144
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.4
4
+ version: 1.3.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-07-23 00:00:00.000000000 Z
11
+ date: 2022-06-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -38,20 +38,6 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: libxml-ruby
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
41
  - !ruby/object:Gem::Dependency
56
42
  name: json
57
43
  requirement: !ruby/object:Gem::Requirement
@@ -72,8 +58,10 @@ email: miguel.vazquez@fdi.ucm.es
72
58
  executables:
73
59
  - get_ppis.rb
74
60
  extensions: []
75
- extra_rdoc_files: []
61
+ extra_rdoc_files:
62
+ - LICENSE
76
63
  files:
64
+ - LICENSE
77
65
  - bin/get_ppis.rb
78
66
  - lib/rbbt/bow/bow.rb
79
67
  - lib/rbbt/bow/dictionary.rb
@@ -95,6 +83,7 @@ files:
95
83
  - lib/rbbt/ner/oscar4.rb
96
84
  - lib/rbbt/ner/patterns.rb
97
85
  - lib/rbbt/ner/regexpNER.rb
86
+ - lib/rbbt/ner/rner.rb
98
87
  - lib/rbbt/ner/rnorm.rb
99
88
  - lib/rbbt/ner/rnorm/cue_index.rb
100
89
  - lib/rbbt/ner/rnorm/tokens.rb
@@ -103,6 +92,7 @@ files:
103
92
  - lib/rbbt/nlp/nlp.rb
104
93
  - lib/rbbt/nlp/open_nlp/sentence_splitter.rb
105
94
  - lib/rbbt/nlp/spaCy.rb
95
+ - lib/rbbt/relationship.rb
106
96
  - lib/rbbt/segment.rb
107
97
  - lib/rbbt/segment/annotation.rb
108
98
  - lib/rbbt/segment/encoding.rb
@@ -126,6 +116,7 @@ files:
126
116
  - share/install/software/OpenNLP
127
117
  - share/install/software/StanfordParser
128
118
  - share/patterns/drug_induce_disease
119
+ - share/rner/config.rb
129
120
  - share/rnorm/cue_default
130
121
  - share/rnorm/tokens_default
131
122
  - share/wordlists/stopwords
@@ -136,6 +127,7 @@ files:
136
127
  - test/rbbt/document/test_annotation.rb
137
128
  - test/rbbt/document/test_corpus.rb
138
129
  - test/rbbt/entity/test_document.rb
130
+ - test/rbbt/ner/rnorm/test_tokens.rb
139
131
  - test/rbbt/ner/test_NER.rb
140
132
  - test/rbbt/ner/test_abner.rb
141
133
  - test/rbbt/ner/test_banner.rb
@@ -148,6 +140,7 @@ files:
148
140
  - test/rbbt/ner/test_oscar4.rb
149
141
  - test/rbbt/ner/test_patterns.rb
150
142
  - test/rbbt/ner/test_regexpNER.rb
143
+ - test/rbbt/ner/test_rner.rb
151
144
  - test/rbbt/ner/test_rnorm.rb
152
145
  - test/rbbt/ner/test_token_trieNER.rb
153
146
  - test/rbbt/nlp/genia/test_sentence_splitter.rb
@@ -182,7 +175,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
182
175
  - !ruby/object:Gem::Version
183
176
  version: '0'
184
177
  requirements: []
185
- rubygems_version: 3.0.6
178
+ rubygems_version: 3.1.4
186
179
  signing_key:
187
180
  specification_version: 4
188
181
  summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
@@ -201,6 +194,7 @@ test_files:
201
194
  - test/rbbt/ner/test_patterns.rb
202
195
  - test/rbbt/ner/test_NER.rb
203
196
  - test/rbbt/ner/test_abner.rb
197
+ - test/rbbt/ner/rnorm/test_tokens.rb
204
198
  - test/rbbt/ner/test_rnorm.rb
205
199
  - test/rbbt/ner/test_regexpNER.rb
206
200
  - test/rbbt/ner/test_ngram_prefix_dictionary.rb
@@ -210,6 +204,7 @@ test_files:
210
204
  - test/rbbt/ner/test_banner.rb
211
205
  - test/rbbt/ner/test_token_trieNER.rb
212
206
  - test/rbbt/ner/test_finder.rb
207
+ - test/rbbt/ner/test_rner.rb
213
208
  - test/rbbt/ner/test_linnaeus.rb
214
209
  - test/rbbt/ner/test_oscar4.rb
215
210
  - test/rbbt/test_segment.rb