rbbt-sources 3.1.43 → 3.1.45
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/sources/entrez.rb +3 -3
- data/lib/rbbt/sources/go.rb +1 -1
- data/lib/rbbt/sources/pubmed.rb +24 -12
- data/test/rbbt/sources/test_entrez.rb +3 -0
- data/test/rbbt/sources/test_pubmed.rb +6 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ecb6a979911fd4e4a4988ab9abee186cb181eab335b41505d71e3405f1cac454
|
4
|
+
data.tar.gz: 5b6ba6e080cff6d6bd864dc468f418b1d74d9ee6fe916df0b9edc2a8ba2e5471
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 72cc59fa3439ffbb4b850723c4b6c5403f65301a70ddf61f6672e52646ee1e90644ac125234cff1a51702a100d7ea2731940cfb33a49d0c3531b5fd8877d1d24
|
7
|
+
data.tar.gz: fdf336019c660cf2fb0eea5af696edae0dbb3d31adf7c063060626c44d2ecf6864f1c0dd614479a1166583ad575cb288ce50e4291f90781f26d06568d296c3e7
|
data/lib/rbbt/sources/entrez.rb
CHANGED
@@ -14,7 +14,7 @@ module Entrez
|
|
14
14
|
options = Misc.add_defaults options, :key_field => 1, :fields => [5], :persist => true, :merge => true
|
15
15
|
|
16
16
|
taxs = [taxs] unless Array === taxs
|
17
|
-
options.merge! :grep => taxs.collect{|t| "^" + t.to_s}
|
17
|
+
options.merge! :grep => taxs.collect{|t| "^" + t.to_s}, :fixed_grep => false
|
18
18
|
|
19
19
|
tsv = Rbbt.share.databases.entrez.gene_info.tsv :flat, options
|
20
20
|
tsv.key_field = "Entrez Gene ID"
|
@@ -26,7 +26,7 @@ module Entrez
|
|
26
26
|
options = Misc.add_defaults options, :key_field => 1, :fields => [2], :persist => true, :merge => true
|
27
27
|
|
28
28
|
taxs = [taxs] unless Array === taxs
|
29
|
-
options.merge! :grep => taxs.collect{|t| "^" + t.to_s}
|
29
|
+
options.merge! :grep => taxs.collect{|t| "^" + t.to_s}, :fixed_grep => false
|
30
30
|
|
31
31
|
tsv = Rbbt.share.databases.entrez.gene_info.tsv :flat, options
|
32
32
|
tsv.key_field = "Entrez Gene ID"
|
@@ -39,7 +39,7 @@ module Entrez
|
|
39
39
|
options = {:key_field => 1, :fields => [2], :persist => true, :merge => true}
|
40
40
|
|
41
41
|
taxs = [taxs] unless taxs.is_a?(Array)
|
42
|
-
options.merge! :grep => taxs.collect{|t| "^" + t.to_s}
|
42
|
+
options.merge! :grep => taxs.collect{|t| "^" + t.to_s}, :fixed_grep => false
|
43
43
|
|
44
44
|
Rbbt.share.databases.entrez.gene2pubmed.tsv :flat, options
|
45
45
|
end
|
data/lib/rbbt/sources/go.rb
CHANGED
@@ -27,7 +27,7 @@ module GO
|
|
27
27
|
def self.init
|
28
28
|
Persist.persist_tsv(nil, 'gene_ontology', {}, :persist => true) do |info|
|
29
29
|
info.serializer = :marshal if info.respond_to? :serializer
|
30
|
-
Rbbt.share.databases.GO.gene_ontology.read.split(/\[Term\]/).each{|term|
|
30
|
+
Rbbt.share.databases.GO.gene_ontology.produce.read.split(/\[Term\]/).each{|term|
|
31
31
|
term_info = {}
|
32
32
|
|
33
33
|
term.split(/\n/). select{|l| l =~ /:/}.each{|l|
|
data/lib/rbbt/sources/pubmed.rb
CHANGED
@@ -109,6 +109,14 @@ module PubMed
|
|
109
109
|
end
|
110
110
|
end
|
111
111
|
|
112
|
+
def pmc_full_xml
|
113
|
+
begin
|
114
|
+
Open.read("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=#{pmid}")
|
115
|
+
rescue
|
116
|
+
nil
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
112
120
|
def pdf_url
|
113
121
|
return pmc_pdf if pmc_pdf
|
114
122
|
@gscholar_pdf ||= begin
|
@@ -121,18 +129,22 @@ module PubMed
|
|
121
129
|
end
|
122
130
|
|
123
131
|
def full_text
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
132
|
+
text = if pdf_url
|
133
|
+
text = nil
|
134
|
+
TmpFile.with_file do |pdf|
|
135
|
+
# Change user-agent, oh well...
|
136
|
+
`wget --user-agent=firefox #{ pdf_url } -O #{ pdf } -t 3`
|
137
|
+
TmpFile.with_file do |txt|
|
138
|
+
`pdftotext #{ pdf } #{ txt }`
|
139
|
+
text = Open.read(txt) if File.exists? txt
|
140
|
+
end
|
141
|
+
end
|
142
|
+
text
|
143
|
+
elsif pmc_full_xml
|
144
|
+
pmc_full_xml
|
145
|
+
else
|
146
|
+
nil
|
147
|
+
end
|
136
148
|
|
137
149
|
Misc.fixutf8(text)
|
138
150
|
end
|
@@ -17,6 +17,12 @@ class TestPubMed < Test::Unit::TestCase
|
|
17
17
|
pmid = '16438716'
|
18
18
|
assert(PubMed.get_article(pmid).full_text =~ /Discovering/)
|
19
19
|
end
|
20
|
+
|
21
|
+
def test_pmc_full_xml
|
22
|
+
pmid = '4304705'
|
23
|
+
assert PubMed.get_article(pmid).pmc_full_xml.include?("HBV antigen")
|
24
|
+
end
|
25
|
+
|
20
26
|
|
21
27
|
def test_query
|
22
28
|
assert(PubMed.query('chagoyen[All Fields] AND ("loattrfull text"[sb] AND hasabstract[text])').include? '16438716')
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.45
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-04-
|
11
|
+
date: 2020-04-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|