rbbt-text 1.3.10 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2f10312d9b6598ddc9b6fa98b38909afdd575b33a497ae1ff3f17c7a9c6e37bf
4
- data.tar.gz: f79c61c7e34dd113a2c5002342c0c2df92a4a28c770394bf2c456a34a2730cc7
3
+ metadata.gz: 581a8bf4e03fad79e2650c65ac6c445d83f89a1d237114c91e8ba37b11c1c4f4
4
+ data.tar.gz: 58f2fb21eee7ac37ca30a771609f6b0c394a2b646690fbcd59b0d623261e0522
5
5
  SHA512:
6
- metadata.gz: a9fb4dc49c538a58a8aa04e81947df212668c5ef9097434fa7d3eff54dd17a8657f581451b64e6b247cb64428436823a305dd64ae6a5fed2126b92285c02ad81
7
- data.tar.gz: 0d31423660cd232102aa2b9914dab61ff929cf02a37b5094bd58481cac733c167d0e4fcdb4b3025e41a4775bd8033566ed3f402f66c317b3955406d1a3d3eb6f
6
+ metadata.gz: 646340e7dc850bbe4232f30e947f68b8801b51b6d3e0ded92f378534459993cf08c21885685fe4e11a171026f0b0f09d331fdbdc70d97e81579f0ad53f886ee2
7
+ data.tar.gz: 68b0095f69e08562a22763201c8175e5f6e10d9106118f9dd3b2920a0ae63111f6296dd03b66fd1ab6a5672898a8c4e87d2ca5190671e60ad44b9ea0e6dab78c
data/lib/rbbt/bow/misc.rb CHANGED
@@ -37,7 +37,7 @@ $greek.each{|l,s| $inverse_greek[s] = l }
37
37
 
38
38
  class String
39
39
  CONSONANTS = []
40
- if File.exists? File.join(Rbbt.datadir, 'wordlists/consonants')
40
+ if File.exist? File.join(Rbbt.datadir, 'wordlists/consonants')
41
41
  Object::Open.read(File.join(Rbbt.datadir, 'wordlists/consonants')).each_line{|l| CONSONANTS << l.chomp}
42
42
  end
43
43
 
@@ -20,13 +20,25 @@ module Document::Corpus
20
20
  elsif type.to_sym == :title
21
21
  Document.setup(article.title || "", PUBMED_NAMESPACE, pmid, type.to_sym, self)
22
22
  elsif type.to_sym == :title_and_abstract
23
- Document.setup((article.title || "") + "\n\n" + (article.abstract || ""), PUBMED_NAMESPACE, pmid, type.to_sym, self)
23
+ title = article.title
24
+ abstract = article.abstract
25
+
26
+ if title.nil? || title == ""
27
+ text = article.abstract
28
+ text = "" if text.nil?
29
+ else
30
+ title = title + "." unless title.end_with?(".")
31
+
32
+ text = title + " " + abstract if abstract && ! abstract.empty?
33
+ end
34
+
35
+ Document.setup(text, PUBMED_NAMESPACE, pmid, type.to_sym, self)
24
36
  else
25
37
  raise "No FullText available for #{ pmid }" if article.full_text.nil?
26
38
  Document.setup(article.full_text, PUBMED_NAMESPACE, pmid, :fulltext, self, :corpus => self)
27
39
  end
28
40
  Log.debug "Loading pmid #{pmid}"
29
- add_document(document)
41
+ add_document(document) if document
30
42
  document
31
43
  end
32
44
 
@@ -66,7 +66,7 @@ EOF
66
66
  end
67
67
 
68
68
  Open.write('config', CONFIG)
69
- mem = Rbbt::Config.get(:java_mem, :GNormPlus, :g_norm_plus, :gnormplus, :gnp, :default => "2G")
69
+ mem = Rbbt::Config.get(:java_mem, :GNormPlus, :g_norm_plus, :gnormplus, :gnp, :default => "8G")
70
70
  CMD.cmd_log("java -Xmx#{mem} -Xms#{mem} -jar '#{Rbbt.software.opt.GNormPlus.produce.find}/GNormPlus.jar' 'input' 'output' 'config'")
71
71
 
72
72
  if texts.respond_to? :key_field
@@ -1,7 +1,7 @@
1
1
  #!/bin/bash
2
2
 
3
3
  name='OpenNLP'
4
- url="http://apache.rediris.es/opennlp/opennlp-1.9.4/apache-opennlp-1.9.4-bin.tar.gz"
4
+ url="http://apache.rediris.es/opennlp/opennlp-2.3.0/apache-opennlp-2.3.0-bin.tar.gz"
5
5
 
6
6
  install_src $name $url
7
7
  (cd $OPT_DIR/jars; ln -s $OPT_DIR/$name/lib/*.jar .)
@@ -3,7 +3,7 @@ require 'rbbt/ner/g_norm_plus'
3
3
 
4
4
  Log.severity = 0
5
5
  class TestGNormPlus < Test::Unit::TestCase
6
- def test_match
6
+ def _test_match
7
7
  text =<<-EOF
8
8
 
9
9
  Introduction
@@ -12,7 +12,7 @@ We found that TP53 is regulated by MDM2 in Homo
12
12
  sapiens
13
13
  EOF
14
14
 
15
- Rbbt::Config.add_entry :java_mem, "2G", :gnp
15
+ Rbbt::Config.add_entry :java_mem, "16G", :gnp
16
16
  mentions = GNormPlus.process({:file => text})
17
17
 
18
18
  assert_equal 1, mentions.length
@@ -24,7 +24,7 @@ sapiens
24
24
  We found that TP53 is regulated by MDM2 in Homo sapiens
25
25
  EOF
26
26
 
27
- Rbbt::Config.add_entry :java_mem, "2G", :gnp
27
+ Rbbt::Config.add_entry :java_mem, "16G", :gnp
28
28
  mentions = GNormPlus.entities({:file => text})
29
29
  assert mentions["file"].include?("TP53")
30
30
  mentions["file"].each do |mention|
data/test/test_helper.rb CHANGED
@@ -15,7 +15,11 @@ class Test::Unit::TestCase
15
15
 
16
16
  def setup
17
17
  FileUtils.mkdir_p Rbbt.tmp.test.persistence.find(:user)
18
- Persist.cachedir = Rbbt.tmp.test.persistence.find :user
18
+ begin
19
+ Persist.cachedir = Rbbt.tmp.test.persistence.find :user
20
+ rescue
21
+ Persist.cache_dir = Rbbt.tmp.test.persistence.find :user
22
+ end
19
23
  end
20
24
 
21
25
  def teardown
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.10
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-28 00:00:00.000000000 Z
11
+ date: 2023-12-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -175,45 +175,45 @@ required_rubygems_version: !ruby/object:Gem::Requirement
175
175
  - !ruby/object:Gem::Version
176
176
  version: '0'
177
177
  requirements: []
178
- rubygems_version: 3.1.2
178
+ rubygems_version: 3.5.0.dev
179
179
  signing_key:
180
180
  specification_version: 4
181
181
  summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
182
182
  test_files:
183
- - test/test_spaCy.rb
184
- - test/test_helper.rb
185
- - test/rbbt/bow/test_dictionary.rb
186
183
  - test/rbbt/bow/test_bow.rb
184
+ - test/rbbt/bow/test_dictionary.rb
187
185
  - test/rbbt/bow/test_misc.rb
188
- - test/rbbt/segment/test_encoding.rb
189
- - test/rbbt/segment/test_transformed.rb
190
- - test/rbbt/segment/test_overlaps.rb
191
- - test/rbbt/segment/test_named_entity.rb
192
- - test/rbbt/segment/test_corpus.rb
193
- - test/rbbt/segment/test_range_index.rb
194
- - test/rbbt/segment/test_annotation.rb
195
- - test/rbbt/entity/test_document.rb
196
186
  - test/rbbt/document/corpus/test_pubmed.rb
197
- - test/rbbt/document/test_corpus.rb
198
187
  - test/rbbt/document/test_annotation.rb
199
- - test/rbbt/test_document.rb
200
- - test/rbbt/ner/test_patterns.rb
188
+ - test/rbbt/document/test_corpus.rb
189
+ - test/rbbt/entity/test_document.rb
201
190
  - test/rbbt/ner/rnorm/test_tokens.rb
202
- - test/rbbt/ner/test_ngram_prefix_dictionary.rb
203
- - test/rbbt/ner/test_token_trieNER.rb
204
- - test/rbbt/ner/test_finder.rb
191
+ - test/rbbt/ner/test_NER.rb
192
+ - test/rbbt/ner/test_abner.rb
193
+ - test/rbbt/ner/test_banner.rb
205
194
  - test/rbbt/ner/test_brat.rb
206
- - test/rbbt/ner/test_regexpNER.rb
195
+ - test/rbbt/ner/test_chemical_tagger.rb
196
+ - test/rbbt/ner/test_finder.rb
207
197
  - test/rbbt/ner/test_g_norm_plus.rb
208
- - test/rbbt/ner/test_rnorm.rb
209
198
  - test/rbbt/ner/test_linnaeus.rb
210
- - test/rbbt/ner/test_chemical_tagger.rb
211
- - test/rbbt/ner/test_NER.rb
212
- - test/rbbt/ner/test_abner.rb
213
- - test/rbbt/ner/test_rner.rb
199
+ - test/rbbt/ner/test_ngram_prefix_dictionary.rb
214
200
  - test/rbbt/ner/test_oscar4.rb
215
- - test/rbbt/ner/test_banner.rb
216
- - test/rbbt/test_segment.rb
201
+ - test/rbbt/ner/test_patterns.rb
202
+ - test/rbbt/ner/test_regexpNER.rb
203
+ - test/rbbt/ner/test_rner.rb
204
+ - test/rbbt/ner/test_rnorm.rb
205
+ - test/rbbt/ner/test_token_trieNER.rb
206
+ - test/rbbt/nlp/genia/test_sentence_splitter.rb
217
207
  - test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
218
208
  - test/rbbt/nlp/test_nlp.rb
219
- - test/rbbt/nlp/genia/test_sentence_splitter.rb
209
+ - test/rbbt/segment/test_annotation.rb
210
+ - test/rbbt/segment/test_corpus.rb
211
+ - test/rbbt/segment/test_encoding.rb
212
+ - test/rbbt/segment/test_named_entity.rb
213
+ - test/rbbt/segment/test_overlaps.rb
214
+ - test/rbbt/segment/test_range_index.rb
215
+ - test/rbbt/segment/test_transformed.rb
216
+ - test/rbbt/test_document.rb
217
+ - test/rbbt/test_segment.rb
218
+ - test/test_helper.rb
219
+ - test/test_spaCy.rb