rbbt-text 1.3.10 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/bow/misc.rb +1 -1
- data/lib/rbbt/document/corpus/pubmed.rb +14 -2
- data/lib/rbbt/ner/g_norm_plus.rb +1 -1
- data/share/install/software/OpenNLP +1 -1
- data/test/rbbt/ner/test_g_norm_plus.rb +3 -3
- data/test/test_helper.rb +5 -1
- metadata +29 -29
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 581a8bf4e03fad79e2650c65ac6c445d83f89a1d237114c91e8ba37b11c1c4f4
|
4
|
+
data.tar.gz: 58f2fb21eee7ac37ca30a771609f6b0c394a2b646690fbcd59b0d623261e0522
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 646340e7dc850bbe4232f30e947f68b8801b51b6d3e0ded92f378534459993cf08c21885685fe4e11a171026f0b0f09d331fdbdc70d97e81579f0ad53f886ee2
|
7
|
+
data.tar.gz: 68b0095f69e08562a22763201c8175e5f6e10d9106118f9dd3b2920a0ae63111f6296dd03b66fd1ab6a5672898a8c4e87d2ca5190671e60ad44b9ea0e6dab78c
|
data/lib/rbbt/bow/misc.rb
CHANGED
@@ -37,7 +37,7 @@ $greek.each{|l,s| $inverse_greek[s] = l }
|
|
37
37
|
|
38
38
|
class String
|
39
39
|
CONSONANTS = []
|
40
|
-
if File.
|
40
|
+
if File.exist? File.join(Rbbt.datadir, 'wordlists/consonants')
|
41
41
|
Object::Open.read(File.join(Rbbt.datadir, 'wordlists/consonants')).each_line{|l| CONSONANTS << l.chomp}
|
42
42
|
end
|
43
43
|
|
@@ -20,13 +20,25 @@ module Document::Corpus
|
|
20
20
|
elsif type.to_sym == :title
|
21
21
|
Document.setup(article.title || "", PUBMED_NAMESPACE, pmid, type.to_sym, self)
|
22
22
|
elsif type.to_sym == :title_and_abstract
|
23
|
-
|
23
|
+
title = article.title
|
24
|
+
abstract = article.abstract
|
25
|
+
|
26
|
+
if title.nil? || title == ""
|
27
|
+
text = article.abstract
|
28
|
+
text = "" if text.nil?
|
29
|
+
else
|
30
|
+
title = title + "." unless title.end_with?(".")
|
31
|
+
|
32
|
+
text = title + " " + abstract if abstract && ! abstract.empty?
|
33
|
+
end
|
34
|
+
|
35
|
+
Document.setup(text, PUBMED_NAMESPACE, pmid, type.to_sym, self)
|
24
36
|
else
|
25
37
|
raise "No FullText available for #{ pmid }" if article.full_text.nil?
|
26
38
|
Document.setup(article.full_text, PUBMED_NAMESPACE, pmid, :fulltext, self, :corpus => self)
|
27
39
|
end
|
28
40
|
Log.debug "Loading pmid #{pmid}"
|
29
|
-
add_document(document)
|
41
|
+
add_document(document) if document
|
30
42
|
document
|
31
43
|
end
|
32
44
|
|
data/lib/rbbt/ner/g_norm_plus.rb
CHANGED
@@ -66,7 +66,7 @@ EOF
|
|
66
66
|
end
|
67
67
|
|
68
68
|
Open.write('config', CONFIG)
|
69
|
-
mem = Rbbt::Config.get(:java_mem, :GNormPlus, :g_norm_plus, :gnormplus, :gnp, :default => "
|
69
|
+
mem = Rbbt::Config.get(:java_mem, :GNormPlus, :g_norm_plus, :gnormplus, :gnp, :default => "8G")
|
70
70
|
CMD.cmd_log("java -Xmx#{mem} -Xms#{mem} -jar '#{Rbbt.software.opt.GNormPlus.produce.find}/GNormPlus.jar' 'input' 'output' 'config'")
|
71
71
|
|
72
72
|
if texts.respond_to? :key_field
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
|
3
3
|
name='OpenNLP'
|
4
|
-
url="http://apache.rediris.es/opennlp/opennlp-
|
4
|
+
url="http://apache.rediris.es/opennlp/opennlp-2.3.0/apache-opennlp-2.3.0-bin.tar.gz"
|
5
5
|
|
6
6
|
install_src $name $url
|
7
7
|
(cd $OPT_DIR/jars; ln -s $OPT_DIR/$name/lib/*.jar .)
|
@@ -3,7 +3,7 @@ require 'rbbt/ner/g_norm_plus'
|
|
3
3
|
|
4
4
|
Log.severity = 0
|
5
5
|
class TestGNormPlus < Test::Unit::TestCase
|
6
|
-
def
|
6
|
+
def _test_match
|
7
7
|
text =<<-EOF
|
8
8
|
|
9
9
|
Introduction
|
@@ -12,7 +12,7 @@ We found that TP53 is regulated by MDM2 in Homo
|
|
12
12
|
sapiens
|
13
13
|
EOF
|
14
14
|
|
15
|
-
Rbbt::Config.add_entry :java_mem, "
|
15
|
+
Rbbt::Config.add_entry :java_mem, "16G", :gnp
|
16
16
|
mentions = GNormPlus.process({:file => text})
|
17
17
|
|
18
18
|
assert_equal 1, mentions.length
|
@@ -24,7 +24,7 @@ sapiens
|
|
24
24
|
We found that TP53 is regulated by MDM2 in Homo sapiens
|
25
25
|
EOF
|
26
26
|
|
27
|
-
Rbbt::Config.add_entry :java_mem, "
|
27
|
+
Rbbt::Config.add_entry :java_mem, "16G", :gnp
|
28
28
|
mentions = GNormPlus.entities({:file => text})
|
29
29
|
assert mentions["file"].include?("TP53")
|
30
30
|
mentions["file"].each do |mention|
|
data/test/test_helper.rb
CHANGED
@@ -15,7 +15,11 @@ class Test::Unit::TestCase
|
|
15
15
|
|
16
16
|
def setup
|
17
17
|
FileUtils.mkdir_p Rbbt.tmp.test.persistence.find(:user)
|
18
|
-
|
18
|
+
begin
|
19
|
+
Persist.cachedir = Rbbt.tmp.test.persistence.find :user
|
20
|
+
rescue
|
21
|
+
Persist.cache_dir = Rbbt.tmp.test.persistence.find :user
|
22
|
+
end
|
19
23
|
end
|
20
24
|
|
21
25
|
def teardown
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-12-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -175,45 +175,45 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
175
175
|
- !ruby/object:Gem::Version
|
176
176
|
version: '0'
|
177
177
|
requirements: []
|
178
|
-
rubygems_version: 3.
|
178
|
+
rubygems_version: 3.5.0.dev
|
179
179
|
signing_key:
|
180
180
|
specification_version: 4
|
181
181
|
summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
|
182
182
|
test_files:
|
183
|
-
- test/test_spaCy.rb
|
184
|
-
- test/test_helper.rb
|
185
|
-
- test/rbbt/bow/test_dictionary.rb
|
186
183
|
- test/rbbt/bow/test_bow.rb
|
184
|
+
- test/rbbt/bow/test_dictionary.rb
|
187
185
|
- test/rbbt/bow/test_misc.rb
|
188
|
-
- test/rbbt/segment/test_encoding.rb
|
189
|
-
- test/rbbt/segment/test_transformed.rb
|
190
|
-
- test/rbbt/segment/test_overlaps.rb
|
191
|
-
- test/rbbt/segment/test_named_entity.rb
|
192
|
-
- test/rbbt/segment/test_corpus.rb
|
193
|
-
- test/rbbt/segment/test_range_index.rb
|
194
|
-
- test/rbbt/segment/test_annotation.rb
|
195
|
-
- test/rbbt/entity/test_document.rb
|
196
186
|
- test/rbbt/document/corpus/test_pubmed.rb
|
197
|
-
- test/rbbt/document/test_corpus.rb
|
198
187
|
- test/rbbt/document/test_annotation.rb
|
199
|
-
- test/rbbt/
|
200
|
-
- test/rbbt/
|
188
|
+
- test/rbbt/document/test_corpus.rb
|
189
|
+
- test/rbbt/entity/test_document.rb
|
201
190
|
- test/rbbt/ner/rnorm/test_tokens.rb
|
202
|
-
- test/rbbt/ner/
|
203
|
-
- test/rbbt/ner/
|
204
|
-
- test/rbbt/ner/
|
191
|
+
- test/rbbt/ner/test_NER.rb
|
192
|
+
- test/rbbt/ner/test_abner.rb
|
193
|
+
- test/rbbt/ner/test_banner.rb
|
205
194
|
- test/rbbt/ner/test_brat.rb
|
206
|
-
- test/rbbt/ner/
|
195
|
+
- test/rbbt/ner/test_chemical_tagger.rb
|
196
|
+
- test/rbbt/ner/test_finder.rb
|
207
197
|
- test/rbbt/ner/test_g_norm_plus.rb
|
208
|
-
- test/rbbt/ner/test_rnorm.rb
|
209
198
|
- test/rbbt/ner/test_linnaeus.rb
|
210
|
-
- test/rbbt/ner/
|
211
|
-
- test/rbbt/ner/test_NER.rb
|
212
|
-
- test/rbbt/ner/test_abner.rb
|
213
|
-
- test/rbbt/ner/test_rner.rb
|
199
|
+
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
214
200
|
- test/rbbt/ner/test_oscar4.rb
|
215
|
-
- test/rbbt/ner/
|
216
|
-
- test/rbbt/
|
201
|
+
- test/rbbt/ner/test_patterns.rb
|
202
|
+
- test/rbbt/ner/test_regexpNER.rb
|
203
|
+
- test/rbbt/ner/test_rner.rb
|
204
|
+
- test/rbbt/ner/test_rnorm.rb
|
205
|
+
- test/rbbt/ner/test_token_trieNER.rb
|
206
|
+
- test/rbbt/nlp/genia/test_sentence_splitter.rb
|
217
207
|
- test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
|
218
208
|
- test/rbbt/nlp/test_nlp.rb
|
219
|
-
- test/rbbt/
|
209
|
+
- test/rbbt/segment/test_annotation.rb
|
210
|
+
- test/rbbt/segment/test_corpus.rb
|
211
|
+
- test/rbbt/segment/test_encoding.rb
|
212
|
+
- test/rbbt/segment/test_named_entity.rb
|
213
|
+
- test/rbbt/segment/test_overlaps.rb
|
214
|
+
- test/rbbt/segment/test_range_index.rb
|
215
|
+
- test/rbbt/segment/test_transformed.rb
|
216
|
+
- test/rbbt/test_document.rb
|
217
|
+
- test/rbbt/test_segment.rb
|
218
|
+
- test/test_helper.rb
|
219
|
+
- test/test_spaCy.rb
|