rbbt-text 1.3.5 → 1.3.6
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 110d05d205305e48b17ca63a91f8fef66ba6cb0d24639031614521c463dfde8f
|
4
|
+
data.tar.gz: 97df4097eb1a0cb645f4f6a0eeec9138f296760e09fcdc85a5ebebe6996640d0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bc3c92ae7b5268f02b232f04d28a2b6491740a618c9a9f3d78a099d51ed03dee489ebb805485c82552fc38d8164a4eb75a8e19a6d1df53aaac536e99287009fb
|
7
|
+
data.tar.gz: 1bb1594b9cd5831455e1a7e2454932b57fdd7da4401bc2596146dc1669fa6783931df3042c9deed6a3cf032c59d8697500a470a8aeba859061868cd8ab8b4ab8
|
data/lib/rbbt/document/corpus.rb
CHANGED
@@ -6,6 +6,7 @@ module Document::Corpus
|
|
6
6
|
corpus = Persist.open_tokyocabinet(corpus, true, :single, "BDB") if String === corpus
|
7
7
|
corpus.extend Document::Corpus unless Document::Corpus === corpus
|
8
8
|
corpus.extend Persist::TSVAdapter unless Persist::TSVAdapter === corpus
|
9
|
+
corpus.close
|
9
10
|
corpus
|
10
11
|
end
|
11
12
|
|
@@ -1,18 +1,26 @@
|
|
1
1
|
require 'rbbt/sources/pubmed'
|
2
2
|
|
3
3
|
module Document::Corpus
|
4
|
-
|
4
|
+
PUBMED_NAMESPACE="PMID"
|
5
|
+
def add_pmid(pmid, type = nil, update = false)
|
6
|
+
type = :abstract if type.nil?
|
7
|
+
if update == false
|
8
|
+
id = [PUBMED_NAMESPACE, pmid, type].collect{|e| e.to_s}*":"
|
9
|
+
documents = self.documents(id)
|
10
|
+
return documents if documents.any?
|
11
|
+
end
|
12
|
+
|
5
13
|
pmids = Array === pmid ? pmid : [pmid]
|
6
14
|
type = nil if String === type and type.empty?
|
7
15
|
|
8
16
|
res = PubMed.get_article(pmids).collect do |pmid, article|
|
9
|
-
document = if type.
|
10
|
-
Document.setup(article.abstract || "",
|
17
|
+
document = if type.to_sym == :abstract
|
18
|
+
Document.setup(article.abstract || "", PUBMED_NAMESPACE, pmid, :abstract, self, :corpus => self)
|
11
19
|
elsif type.to_sym == :title
|
12
|
-
Document.setup(article.title,
|
20
|
+
Document.setup(article.title, PUBMED_NAMESPACE, pmid, :title, self)
|
13
21
|
else
|
14
22
|
raise "No FullText available for #{ pmid }" if article.full_text.nil?
|
15
|
-
Document.setup(article.full_text,
|
23
|
+
Document.setup(article.full_text, PUBMED_NAMESPACE, pmid, :fulltext, self, :corpus => self)
|
16
24
|
end
|
17
25
|
Log.debug "Loading pmid #{pmid}"
|
18
26
|
add_document(document)
|
@@ -7,7 +7,7 @@ module OpenNLP
|
|
7
7
|
Rbbt.claim Rbbt.software.opt.OpenNLP, :install, Rbbt.share.install.software.OpenNLP.find
|
8
8
|
|
9
9
|
|
10
|
-
Rbbt.claim Rbbt.software.opt.OpenNLP.models["da-sent.bin"], :url, "
|
10
|
+
Rbbt.claim Rbbt.software.opt.OpenNLP.models["da-sent.bin"], :url, "https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"
|
11
11
|
|
12
12
|
MAX = 5
|
13
13
|
|
@@ -1,12 +1,6 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
|
3
3
|
name='OpenNLP'
|
4
|
-
url="http://apache.rediris.es/opennlp/opennlp-1.9.
|
4
|
+
url="http://apache.rediris.es/opennlp/opennlp-1.9.3/apache-opennlp-1.9.3-bin.tar.gz"
|
5
5
|
|
6
|
-
|
7
|
-
move_opt "$name"
|
8
|
-
|
9
|
-
|
10
|
-
ln -sf "$OPT_DIR/$name/lib/"*.jar "$OPT_JAR_DIR/"
|
11
|
-
|
12
|
-
clean_build
|
6
|
+
install_jar $name $url
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-06-
|
11
|
+
date: 2021-06-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|