rbbt-text 1.3.5 → 1.3.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '0846f900d745dd27df8006eecbc9d294f9f38a23dd76001de2a5dc0313db7e22'
4
- data.tar.gz: 675985882a6c8b9813f620d7ef0a555efa5c148c7c2fe36e0030f84f3fd88cf0
3
+ metadata.gz: 110d05d205305e48b17ca63a91f8fef66ba6cb0d24639031614521c463dfde8f
4
+ data.tar.gz: 97df4097eb1a0cb645f4f6a0eeec9138f296760e09fcdc85a5ebebe6996640d0
5
5
  SHA512:
6
- metadata.gz: dfd9c333b94181496134b825c63d6e93a0390f81d426526f79c00cf12556021b60004b29b57ca9b0b274141937027f7bc780552a60de007e5f790b19910354c0
7
- data.tar.gz: 205beeb8829c8358fd29c0a18351522e566106e24220af3d7bec3676694d37d682b92243e4fd4cd495b542f9945a28cf8585e587342672d31779d0b21b53ae4e
6
+ metadata.gz: bc3c92ae7b5268f02b232f04d28a2b6491740a618c9a9f3d78a099d51ed03dee489ebb805485c82552fc38d8164a4eb75a8e19a6d1df53aaac536e99287009fb
7
+ data.tar.gz: 1bb1594b9cd5831455e1a7e2454932b57fdd7da4401bc2596146dc1669fa6783931df3042c9deed6a3cf032c59d8697500a470a8aeba859061868cd8ab8b4ab8
@@ -6,6 +6,7 @@ module Document::Corpus
6
6
  corpus = Persist.open_tokyocabinet(corpus, true, :single, "BDB") if String === corpus
7
7
  corpus.extend Document::Corpus unless Document::Corpus === corpus
8
8
  corpus.extend Persist::TSVAdapter unless Persist::TSVAdapter === corpus
9
+ corpus.close
9
10
  corpus
10
11
  end
11
12
 
@@ -1,18 +1,26 @@
1
1
  require 'rbbt/sources/pubmed'
2
2
 
3
3
  module Document::Corpus
4
- def add_pmid(pmid, type = nil)
4
+ PUBMED_NAMESPACE="PMID"
5
+ def add_pmid(pmid, type = nil, update = false)
6
+ type = :abstract if type.nil?
7
+ if update == false
8
+ id = [PUBMED_NAMESPACE, pmid, type].collect{|e| e.to_s}*":"
9
+ documents = self.documents(id)
10
+ return documents if documents.any?
11
+ end
12
+
5
13
  pmids = Array === pmid ? pmid : [pmid]
6
14
  type = nil if String === type and type.empty?
7
15
 
8
16
  res = PubMed.get_article(pmids).collect do |pmid, article|
9
- document = if type.nil? || type.to_sym == :abstract
10
- Document.setup(article.abstract || "", "PMID", pmid, :abstract, self, :corpus => self)
17
+ document = if type.to_sym == :abstract
18
+ Document.setup(article.abstract || "", PUBMED_NAMESPACE, pmid, :abstract, self, :corpus => self)
11
19
  elsif type.to_sym == :title
12
- Document.setup(article.title, :PMID, pmid, :title, self)
20
+ Document.setup(article.title, PUBMED_NAMESPACE, pmid, :title, self)
13
21
  else
14
22
  raise "No FullText available for #{ pmid }" if article.full_text.nil?
15
- Document.setup(article.full_text, :PMID, pmid, :fulltext, self, :corpus => self)
23
+ Document.setup(article.full_text, PUBMED_NAMESPACE, pmid, :fulltext, self, :corpus => self)
16
24
  end
17
25
  Log.debug "Loading pmid #{pmid}"
18
26
  add_document(document)
@@ -7,7 +7,7 @@ module OpenNLP
7
7
  Rbbt.claim Rbbt.software.opt.OpenNLP, :install, Rbbt.share.install.software.OpenNLP.find
8
8
 
9
9
 
10
- Rbbt.claim Rbbt.software.opt.OpenNLP.models["da-sent.bin"], :url, "http://opennlp.sourceforge.net/models-1.5/de-sent.bin"
10
+ Rbbt.claim Rbbt.software.opt.OpenNLP.models["da-sent.bin"], :url, "https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"
11
11
 
12
12
  MAX = 5
13
13
 
@@ -1,12 +1,6 @@
1
1
  #!/bin/bash
2
2
 
3
3
  name='OpenNLP'
4
- url="http://apache.rediris.es/opennlp/opennlp-1.9.2/apache-opennlp-1.9.2-bin.tar.gz"
4
+ url="http://apache.rediris.es/opennlp/opennlp-1.9.3/apache-opennlp-1.9.3-bin.tar.gz"
5
5
 
6
- get_src "$name" "$url"
7
- move_opt "$name"
8
-
9
-
10
- ln -sf "$OPT_DIR/$name/lib/"*.jar "$OPT_JAR_DIR/"
11
-
12
- clean_build
6
+ install_jar $name $url
@@ -8,7 +8,6 @@ class TestCorpusPubmed < Test::Unit::TestCase
8
8
  corpus = Document::Corpus.setup({})
9
9
 
10
10
  document = corpus.add_pmid("33359141", :abstract).first
11
- iii document.docid
12
11
  title = document.to(:title)
13
12
  assert title.include?("COVID-19")
14
13
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.5
4
+ version: 1.3.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-17 00:00:00.000000000 Z
11
+ date: 2021-06-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util