rbbt-sources 3.1.43 → 3.1.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/sources/entrez.rb +3 -3
- data/lib/rbbt/sources/go.rb +1 -1
- data/lib/rbbt/sources/pubmed.rb +24 -12
- data/test/rbbt/sources/test_entrez.rb +3 -0
- data/test/rbbt/sources/test_pubmed.rb +6 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ecb6a979911fd4e4a4988ab9abee186cb181eab335b41505d71e3405f1cac454
|
4
|
+
data.tar.gz: 5b6ba6e080cff6d6bd864dc468f418b1d74d9ee6fe916df0b9edc2a8ba2e5471
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 72cc59fa3439ffbb4b850723c4b6c5403f65301a70ddf61f6672e52646ee1e90644ac125234cff1a51702a100d7ea2731940cfb33a49d0c3531b5fd8877d1d24
|
7
|
+
data.tar.gz: fdf336019c660cf2fb0eea5af696edae0dbb3d31adf7c063060626c44d2ecf6864f1c0dd614479a1166583ad575cb288ce50e4291f90781f26d06568d296c3e7
|
data/lib/rbbt/sources/entrez.rb
CHANGED
@@ -14,7 +14,7 @@ module Entrez
|
|
14
14
|
options = Misc.add_defaults options, :key_field => 1, :fields => [5], :persist => true, :merge => true
|
15
15
|
|
16
16
|
taxs = [taxs] unless Array === taxs
|
17
|
-
options.merge! :grep => taxs.collect{|t| "^" + t.to_s}
|
17
|
+
options.merge! :grep => taxs.collect{|t| "^" + t.to_s}, :fixed_grep => false
|
18
18
|
|
19
19
|
tsv = Rbbt.share.databases.entrez.gene_info.tsv :flat, options
|
20
20
|
tsv.key_field = "Entrez Gene ID"
|
@@ -26,7 +26,7 @@ module Entrez
|
|
26
26
|
options = Misc.add_defaults options, :key_field => 1, :fields => [2], :persist => true, :merge => true
|
27
27
|
|
28
28
|
taxs = [taxs] unless Array === taxs
|
29
|
-
options.merge! :grep => taxs.collect{|t| "^" + t.to_s}
|
29
|
+
options.merge! :grep => taxs.collect{|t| "^" + t.to_s}, :fixed_grep => false
|
30
30
|
|
31
31
|
tsv = Rbbt.share.databases.entrez.gene_info.tsv :flat, options
|
32
32
|
tsv.key_field = "Entrez Gene ID"
|
@@ -39,7 +39,7 @@ module Entrez
|
|
39
39
|
options = {:key_field => 1, :fields => [2], :persist => true, :merge => true}
|
40
40
|
|
41
41
|
taxs = [taxs] unless taxs.is_a?(Array)
|
42
|
-
options.merge! :grep => taxs.collect{|t| "^" + t.to_s}
|
42
|
+
options.merge! :grep => taxs.collect{|t| "^" + t.to_s}, :fixed_grep => false
|
43
43
|
|
44
44
|
Rbbt.share.databases.entrez.gene2pubmed.tsv :flat, options
|
45
45
|
end
|
data/lib/rbbt/sources/go.rb
CHANGED
@@ -27,7 +27,7 @@ module GO
|
|
27
27
|
def self.init
|
28
28
|
Persist.persist_tsv(nil, 'gene_ontology', {}, :persist => true) do |info|
|
29
29
|
info.serializer = :marshal if info.respond_to? :serializer
|
30
|
-
Rbbt.share.databases.GO.gene_ontology.read.split(/\[Term\]/).each{|term|
|
30
|
+
Rbbt.share.databases.GO.gene_ontology.produce.read.split(/\[Term\]/).each{|term|
|
31
31
|
term_info = {}
|
32
32
|
|
33
33
|
term.split(/\n/). select{|l| l =~ /:/}.each{|l|
|
data/lib/rbbt/sources/pubmed.rb
CHANGED
@@ -109,6 +109,14 @@ module PubMed
|
|
109
109
|
end
|
110
110
|
end
|
111
111
|
|
112
|
+
def pmc_full_xml
|
113
|
+
begin
|
114
|
+
Open.read("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=#{pmid}")
|
115
|
+
rescue
|
116
|
+
nil
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
112
120
|
def pdf_url
|
113
121
|
return pmc_pdf if pmc_pdf
|
114
122
|
@gscholar_pdf ||= begin
|
@@ -121,18 +129,22 @@ module PubMed
|
|
121
129
|
end
|
122
130
|
|
123
131
|
def full_text
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
132
|
+
text = if pdf_url
|
133
|
+
text = nil
|
134
|
+
TmpFile.with_file do |pdf|
|
135
|
+
# Change user-agent, oh well...
|
136
|
+
`wget --user-agent=firefox #{ pdf_url } -O #{ pdf } -t 3`
|
137
|
+
TmpFile.with_file do |txt|
|
138
|
+
`pdftotext #{ pdf } #{ txt }`
|
139
|
+
text = Open.read(txt) if File.exists? txt
|
140
|
+
end
|
141
|
+
end
|
142
|
+
text
|
143
|
+
elsif pmc_full_xml
|
144
|
+
pmc_full_xml
|
145
|
+
else
|
146
|
+
nil
|
147
|
+
end
|
136
148
|
|
137
149
|
Misc.fixutf8(text)
|
138
150
|
end
|
@@ -17,6 +17,12 @@ class TestPubMed < Test::Unit::TestCase
|
|
17
17
|
pmid = '16438716'
|
18
18
|
assert(PubMed.get_article(pmid).full_text =~ /Discovering/)
|
19
19
|
end
|
20
|
+
|
21
|
+
def test_pmc_full_xml
|
22
|
+
pmid = '4304705'
|
23
|
+
assert PubMed.get_article(pmid).pmc_full_xml.include?("HBV antigen")
|
24
|
+
end
|
25
|
+
|
20
26
|
|
21
27
|
def test_query
|
22
28
|
assert(PubMed.query('chagoyen[All Fields] AND ("loattrfull text"[sb] AND hasabstract[text])').include? '16438716')
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.45
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-04-
|
11
|
+
date: 2020-04-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|