rbbt-text 1.3.5 → 1.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '0846f900d745dd27df8006eecbc9d294f9f38a23dd76001de2a5dc0313db7e22'
4
- data.tar.gz: 675985882a6c8b9813f620d7ef0a555efa5c148c7c2fe36e0030f84f3fd88cf0
3
+ metadata.gz: 110d05d205305e48b17ca63a91f8fef66ba6cb0d24639031614521c463dfde8f
4
+ data.tar.gz: 97df4097eb1a0cb645f4f6a0eeec9138f296760e09fcdc85a5ebebe6996640d0
5
5
  SHA512:
6
- metadata.gz: dfd9c333b94181496134b825c63d6e93a0390f81d426526f79c00cf12556021b60004b29b57ca9b0b274141937027f7bc780552a60de007e5f790b19910354c0
7
- data.tar.gz: 205beeb8829c8358fd29c0a18351522e566106e24220af3d7bec3676694d37d682b92243e4fd4cd495b542f9945a28cf8585e587342672d31779d0b21b53ae4e
6
+ metadata.gz: bc3c92ae7b5268f02b232f04d28a2b6491740a618c9a9f3d78a099d51ed03dee489ebb805485c82552fc38d8164a4eb75a8e19a6d1df53aaac536e99287009fb
7
+ data.tar.gz: 1bb1594b9cd5831455e1a7e2454932b57fdd7da4401bc2596146dc1669fa6783931df3042c9deed6a3cf032c59d8697500a470a8aeba859061868cd8ab8b4ab8
@@ -6,6 +6,7 @@ module Document::Corpus
6
6
  corpus = Persist.open_tokyocabinet(corpus, true, :single, "BDB") if String === corpus
7
7
  corpus.extend Document::Corpus unless Document::Corpus === corpus
8
8
  corpus.extend Persist::TSVAdapter unless Persist::TSVAdapter === corpus
9
+ corpus.close
9
10
  corpus
10
11
  end
11
12
 
@@ -1,18 +1,26 @@
1
1
  require 'rbbt/sources/pubmed'
2
2
 
3
3
  module Document::Corpus
4
- def add_pmid(pmid, type = nil)
4
+ PUBMED_NAMESPACE="PMID"
5
+ def add_pmid(pmid, type = nil, update = false)
6
+ type = :abstract if type.nil?
7
+ if update == false
8
+ id = [PUBMED_NAMESPACE, pmid, type].collect{|e| e.to_s}*":"
9
+ documents = self.documents(id)
10
+ return documents if documents.any?
11
+ end
12
+
5
13
  pmids = Array === pmid ? pmid : [pmid]
6
14
  type = nil if String === type and type.empty?
7
15
 
8
16
  res = PubMed.get_article(pmids).collect do |pmid, article|
9
- document = if type.nil? || type.to_sym == :abstract
10
- Document.setup(article.abstract || "", "PMID", pmid, :abstract, self, :corpus => self)
17
+ document = if type.to_sym == :abstract
18
+ Document.setup(article.abstract || "", PUBMED_NAMESPACE, pmid, :abstract, self, :corpus => self)
11
19
  elsif type.to_sym == :title
12
- Document.setup(article.title, :PMID, pmid, :title, self)
20
+ Document.setup(article.title, PUBMED_NAMESPACE, pmid, :title, self)
13
21
  else
14
22
  raise "No FullText available for #{ pmid }" if article.full_text.nil?
15
- Document.setup(article.full_text, :PMID, pmid, :fulltext, self, :corpus => self)
23
+ Document.setup(article.full_text, PUBMED_NAMESPACE, pmid, :fulltext, self, :corpus => self)
16
24
  end
17
25
  Log.debug "Loading pmid #{pmid}"
18
26
  add_document(document)
@@ -7,7 +7,7 @@ module OpenNLP
7
7
  Rbbt.claim Rbbt.software.opt.OpenNLP, :install, Rbbt.share.install.software.OpenNLP.find
8
8
 
9
9
 
10
- Rbbt.claim Rbbt.software.opt.OpenNLP.models["da-sent.bin"], :url, "http://opennlp.sourceforge.net/models-1.5/de-sent.bin"
10
+ Rbbt.claim Rbbt.software.opt.OpenNLP.models["da-sent.bin"], :url, "https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"
11
11
 
12
12
  MAX = 5
13
13
 
@@ -1,12 +1,6 @@
1
1
  #!/bin/bash
2
2
 
3
3
  name='OpenNLP'
4
- url="http://apache.rediris.es/opennlp/opennlp-1.9.2/apache-opennlp-1.9.2-bin.tar.gz"
4
+ url="http://apache.rediris.es/opennlp/opennlp-1.9.3/apache-opennlp-1.9.3-bin.tar.gz"
5
5
 
6
- get_src "$name" "$url"
7
- move_opt "$name"
8
-
9
-
10
- ln -sf "$OPT_DIR/$name/lib/"*.jar "$OPT_JAR_DIR/"
11
-
12
- clean_build
6
+ install_jar $name $url
@@ -8,7 +8,6 @@ class TestCorpusPubmed < Test::Unit::TestCase
8
8
  corpus = Document::Corpus.setup({})
9
9
 
10
10
  document = corpus.add_pmid("33359141", :abstract).first
11
- iii document.docid
12
11
  title = document.to(:title)
13
12
  assert title.include?("COVID-19")
14
13
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.5
4
+ version: 1.3.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-17 00:00:00.000000000 Z
11
+ date: 2021-06-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util