rbbt-text 1.3.11 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 587b7971fd0f8fec2602323ea480521445ca417c3a407d057cf95df1f3a36216
4
- data.tar.gz: 4f66306f80e838a0a27299705d79e5856b38dd936005d2b18004539bbb192431
3
+ metadata.gz: 581a8bf4e03fad79e2650c65ac6c445d83f89a1d237114c91e8ba37b11c1c4f4
4
+ data.tar.gz: 58f2fb21eee7ac37ca30a771609f6b0c394a2b646690fbcd59b0d623261e0522
5
5
  SHA512:
6
- metadata.gz: 4aa191aa4e5cb5e3f7d4a49b30beb1eb7259a34074a7521a10b9951cd1cc7a097a06ba6a97d9f4f4e100b2058de3f94f5199cb069a030f93a3f69bf1ecec09ff
7
- data.tar.gz: f3d5eb11d12f8a9d951d1073abd7e6cb5ace99bd075e7dc897f0aa715ae1552271019b4eb6849a172529d830f30bbc09ce40dd351fd81f9c06f338b075523e36
6
+ metadata.gz: 646340e7dc850bbe4232f30e947f68b8801b51b6d3e0ded92f378534459993cf08c21885685fe4e11a171026f0b0f09d331fdbdc70d97e81579f0ad53f886ee2
7
+ data.tar.gz: 68b0095f69e08562a22763201c8175e5f6e10d9106118f9dd3b2920a0ae63111f6296dd03b66fd1ab6a5672898a8c4e87d2ca5190671e60ad44b9ea0e6dab78c
@@ -20,13 +20,25 @@ module Document::Corpus
20
20
  elsif type.to_sym == :title
21
21
  Document.setup(article.title || "", PUBMED_NAMESPACE, pmid, type.to_sym, self)
22
22
  elsif type.to_sym == :title_and_abstract
23
- Document.setup((article.title || "") + " " + (article.abstract || ""), PUBMED_NAMESPACE, pmid, type.to_sym, self)
23
+ title = article.title
24
+ abstract = article.abstract
25
+
26
+ if title.nil? || title == ""
27
+ text = article.abstract
28
+ text = "" if text.nil?
29
+ else
30
+ title = title + "." unless title.end_with?(".")
31
+
32
+ text = title + " " + abstract if abstract && ! abstract.empty?
33
+ end
34
+
35
+ Document.setup(text, PUBMED_NAMESPACE, pmid, type.to_sym, self)
24
36
  else
25
37
  raise "No FullText available for #{ pmid }" if article.full_text.nil?
26
38
  Document.setup(article.full_text, PUBMED_NAMESPACE, pmid, :fulltext, self, :corpus => self)
27
39
  end
28
40
  Log.debug "Loading pmid #{pmid}"
29
- add_document(document)
41
+ add_document(document) if document
30
42
  document
31
43
  end
32
44
 
@@ -66,7 +66,7 @@ EOF
66
66
  end
67
67
 
68
68
  Open.write('config', CONFIG)
69
- mem = Rbbt::Config.get(:java_mem, :GNormPlus, :g_norm_plus, :gnormplus, :gnp, :default => "2G")
69
+ mem = Rbbt::Config.get(:java_mem, :GNormPlus, :g_norm_plus, :gnormplus, :gnp, :default => "8G")
70
70
  CMD.cmd_log("java -Xmx#{mem} -Xms#{mem} -jar '#{Rbbt.software.opt.GNormPlus.produce.find}/GNormPlus.jar' 'input' 'output' 'config'")
71
71
 
72
72
  if texts.respond_to? :key_field
@@ -1,7 +1,7 @@
1
1
  #!/bin/bash
2
2
 
3
3
  name='OpenNLP'
4
- url="http://apache.rediris.es/opennlp/opennlp-1.9.4/apache-opennlp-1.9.4-bin.tar.gz"
4
+ url="http://apache.rediris.es/opennlp/opennlp-2.3.0/apache-opennlp-2.3.0-bin.tar.gz"
5
5
 
6
6
  install_src $name $url
7
7
  (cd $OPT_DIR/jars; ln -s $OPT_DIR/$name/lib/*.jar .)
@@ -3,7 +3,7 @@ require 'rbbt/ner/g_norm_plus'
3
3
 
4
4
  Log.severity = 0
5
5
  class TestGNormPlus < Test::Unit::TestCase
6
- def test_match
6
+ def _test_match
7
7
  text =<<-EOF
8
8
 
9
9
  Introduction
@@ -12,7 +12,7 @@ We found that TP53 is regulated by MDM2 in Homo
12
12
  sapiens
13
13
  EOF
14
14
 
15
- Rbbt::Config.add_entry :java_mem, "2G", :gnp
15
+ Rbbt::Config.add_entry :java_mem, "16G", :gnp
16
16
  mentions = GNormPlus.process({:file => text})
17
17
 
18
18
  assert_equal 1, mentions.length
@@ -24,7 +24,7 @@ sapiens
24
24
  We found that TP53 is regulated by MDM2 in Homo sapiens
25
25
  EOF
26
26
 
27
- Rbbt::Config.add_entry :java_mem, "2G", :gnp
27
+ Rbbt::Config.add_entry :java_mem, "16G", :gnp
28
28
  mentions = GNormPlus.entities({:file => text})
29
29
  assert mentions["file"].include?("TP53")
30
30
  mentions["file"].each do |mention|
data/test/test_helper.rb CHANGED
@@ -15,7 +15,11 @@ class Test::Unit::TestCase
15
15
 
16
16
  def setup
17
17
  FileUtils.mkdir_p Rbbt.tmp.test.persistence.find(:user)
18
- Persist.cachedir = Rbbt.tmp.test.persistence.find :user
18
+ begin
19
+ Persist.cachedir = Rbbt.tmp.test.persistence.find :user
20
+ rescue
21
+ Persist.cache_dir = Rbbt.tmp.test.persistence.find :user
22
+ end
19
23
  end
20
24
 
21
25
  def teardown
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.11
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-04-11 00:00:00.000000000 Z
11
+ date: 2023-12-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -175,45 +175,45 @@ required_rubygems_version: !ruby/object:Gem::Requirement
175
175
  - !ruby/object:Gem::Version
176
176
  version: '0'
177
177
  requirements: []
178
- rubygems_version: 3.4.8
178
+ rubygems_version: 3.5.0.dev
179
179
  signing_key:
180
180
  specification_version: 4
181
181
  summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
182
182
  test_files:
183
- - test/rbbt/nlp/test_nlp.rb
184
- - test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
185
- - test/rbbt/nlp/genia/test_sentence_splitter.rb
186
183
  - test/rbbt/bow/test_bow.rb
187
- - test/rbbt/bow/test_misc.rb
188
184
  - test/rbbt/bow/test_dictionary.rb
189
- - test/rbbt/test_document.rb
190
- - test/rbbt/document/test_annotation.rb
185
+ - test/rbbt/bow/test_misc.rb
191
186
  - test/rbbt/document/corpus/test_pubmed.rb
187
+ - test/rbbt/document/test_annotation.rb
192
188
  - test/rbbt/document/test_corpus.rb
193
189
  - test/rbbt/entity/test_document.rb
194
- - test/rbbt/ner/test_patterns.rb
190
+ - test/rbbt/ner/rnorm/test_tokens.rb
195
191
  - test/rbbt/ner/test_NER.rb
196
192
  - test/rbbt/ner/test_abner.rb
197
- - test/rbbt/ner/rnorm/test_tokens.rb
198
- - test/rbbt/ner/test_rnorm.rb
199
- - test/rbbt/ner/test_regexpNER.rb
200
- - test/rbbt/ner/test_ngram_prefix_dictionary.rb
193
+ - test/rbbt/ner/test_banner.rb
201
194
  - test/rbbt/ner/test_brat.rb
202
- - test/rbbt/ner/test_g_norm_plus.rb
203
195
  - test/rbbt/ner/test_chemical_tagger.rb
204
- - test/rbbt/ner/test_banner.rb
205
- - test/rbbt/ner/test_token_trieNER.rb
206
196
  - test/rbbt/ner/test_finder.rb
207
- - test/rbbt/ner/test_rner.rb
197
+ - test/rbbt/ner/test_g_norm_plus.rb
208
198
  - test/rbbt/ner/test_linnaeus.rb
199
+ - test/rbbt/ner/test_ngram_prefix_dictionary.rb
209
200
  - test/rbbt/ner/test_oscar4.rb
210
- - test/rbbt/test_segment.rb
211
- - test/rbbt/segment/test_transformed.rb
212
- - test/rbbt/segment/test_overlaps.rb
201
+ - test/rbbt/ner/test_patterns.rb
202
+ - test/rbbt/ner/test_regexpNER.rb
203
+ - test/rbbt/ner/test_rner.rb
204
+ - test/rbbt/ner/test_rnorm.rb
205
+ - test/rbbt/ner/test_token_trieNER.rb
206
+ - test/rbbt/nlp/genia/test_sentence_splitter.rb
207
+ - test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
208
+ - test/rbbt/nlp/test_nlp.rb
213
209
  - test/rbbt/segment/test_annotation.rb
214
- - test/rbbt/segment/test_named_entity.rb
210
+ - test/rbbt/segment/test_corpus.rb
215
211
  - test/rbbt/segment/test_encoding.rb
212
+ - test/rbbt/segment/test_named_entity.rb
213
+ - test/rbbt/segment/test_overlaps.rb
216
214
  - test/rbbt/segment/test_range_index.rb
217
- - test/rbbt/segment/test_corpus.rb
218
- - test/test_spaCy.rb
215
+ - test/rbbt/segment/test_transformed.rb
216
+ - test/rbbt/test_document.rb
217
+ - test/rbbt/test_segment.rb
219
218
  - test/test_helper.rb
219
+ - test/test_spaCy.rb