rbbt-sources 3.1.41 → 3.1.47
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/etc/biomart/missing_in_archive +4 -0
- data/lib/rbbt/sources/biomart.rb +5 -0
- data/lib/rbbt/sources/entrez.rb +3 -3
- data/lib/rbbt/sources/go.rb +1 -1
- data/lib/rbbt/sources/pina.rb +1 -1
- data/lib/rbbt/sources/pubmed.rb +64 -52
- data/lib/rbbt/sources/tfacts.rb +14 -14
- data/share/install/Organism/Hsa/Rakefile +1 -1
- data/share/install/Organism/Mmu/Rakefile +1 -1
- data/share/install/Organism/Rno/Rakefile +1 -1
- data/share/install/Organism/organism_helpers.rb +2 -2
- data/test/rbbt/sources/test_entrez.rb +3 -0
- data/test/rbbt/sources/test_pubmed.rb +6 -0
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6cecde427390fc444e5ef5b6bef42072bf87714b404647c6c086da92a1eaa13a
|
4
|
+
data.tar.gz: 673429c8eab433ec9854c81d52450f8885c57e4b3f8b6779ded44585e6f88343
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e39a3a6418bfb514ec786da0128f6fb8437da9728224dacd41b837300680c6d34e821f08954820cb5b7fd064546fee21fd0557d179c469eda969485ffd5cd965
|
7
|
+
data.tar.gz: 9f332f35fcd9c66db7506875efcc7c5276b1f588dd3b70ca299aaa2a7f25c33a650e496ba207af4e12e3bb2a6dfbc9ec8b1f0729e3acedc354145572dd268836
|
data/lib/rbbt/sources/biomart.rb
CHANGED
@@ -134,11 +134,14 @@ module BioMart
|
|
134
134
|
# cause an error if the BioMart WS does not allow filtering with that
|
135
135
|
# attribute.
|
136
136
|
def self.query(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
|
137
|
+
IndiferentHash.setup(open_options)
|
137
138
|
open_options = Misc.add_defaults open_options, :nocache => false, :filename => nil, :field_names => nil, :by_chr => false
|
138
139
|
filename, field_names, by_chr = Misc.process_options open_options, :filename, :field_names, :by_chr
|
139
140
|
attrs ||= []
|
140
141
|
open_options = Misc.add_defaults open_options, :keep_empty => false, :merge => true
|
141
142
|
|
143
|
+
IndiferentHash.setup(open_options)
|
144
|
+
|
142
145
|
Log.low "BioMart query: '#{main}' [#{(attrs || []) * ', '}] [#{(filters || []) * ', '}] #{open_options.inspect}"
|
143
146
|
|
144
147
|
max_items = 2
|
@@ -167,6 +170,7 @@ module BioMart
|
|
167
170
|
end
|
168
171
|
|
169
172
|
open_options[:filename] = "BioMart[#{main}+#{attrs.length}]"
|
173
|
+
|
170
174
|
if filename.nil?
|
171
175
|
results = TSV.open data, open_options
|
172
176
|
results.key_field = main
|
@@ -200,6 +204,7 @@ module BioMart
|
|
200
204
|
missing+=v if Organism.compare_archives(current_archive, t) == -1
|
201
205
|
elsif k=~ /^>(.*)/
|
202
206
|
t = $1.strip
|
207
|
+
iii [current_archive, t, Organism.compare_archives(current_archive, t)]
|
203
208
|
missing+=v if Organism.compare_archives(current_archive, t) == 1
|
204
209
|
end
|
205
210
|
end
|
data/lib/rbbt/sources/entrez.rb
CHANGED
@@ -14,7 +14,7 @@ module Entrez
|
|
14
14
|
options = Misc.add_defaults options, :key_field => 1, :fields => [5], :persist => true, :merge => true
|
15
15
|
|
16
16
|
taxs = [taxs] unless Array === taxs
|
17
|
-
options.merge! :grep => taxs.collect{|t| "^" + t.to_s}
|
17
|
+
options.merge! :grep => taxs.collect{|t| "^" + t.to_s}, :fixed_grep => false
|
18
18
|
|
19
19
|
tsv = Rbbt.share.databases.entrez.gene_info.tsv :flat, options
|
20
20
|
tsv.key_field = "Entrez Gene ID"
|
@@ -26,7 +26,7 @@ module Entrez
|
|
26
26
|
options = Misc.add_defaults options, :key_field => 1, :fields => [2], :persist => true, :merge => true
|
27
27
|
|
28
28
|
taxs = [taxs] unless Array === taxs
|
29
|
-
options.merge! :grep => taxs.collect{|t| "^" + t.to_s}
|
29
|
+
options.merge! :grep => taxs.collect{|t| "^" + t.to_s}, :fixed_grep => false
|
30
30
|
|
31
31
|
tsv = Rbbt.share.databases.entrez.gene_info.tsv :flat, options
|
32
32
|
tsv.key_field = "Entrez Gene ID"
|
@@ -39,7 +39,7 @@ module Entrez
|
|
39
39
|
options = {:key_field => 1, :fields => [2], :persist => true, :merge => true}
|
40
40
|
|
41
41
|
taxs = [taxs] unless taxs.is_a?(Array)
|
42
|
-
options.merge! :grep => taxs.collect{|t| "^" + t.to_s}
|
42
|
+
options.merge! :grep => taxs.collect{|t| "^" + t.to_s}, :fixed_grep => false
|
43
43
|
|
44
44
|
Rbbt.share.databases.entrez.gene2pubmed.tsv :flat, options
|
45
45
|
end
|
data/lib/rbbt/sources/go.rb
CHANGED
@@ -27,7 +27,7 @@ module GO
|
|
27
27
|
def self.init
|
28
28
|
Persist.persist_tsv(nil, 'gene_ontology', {}, :persist => true) do |info|
|
29
29
|
info.serializer = :marshal if info.respond_to? :serializer
|
30
|
-
Rbbt.share.databases.GO.gene_ontology.read.split(/\[Term\]/).each{|term|
|
30
|
+
Rbbt.share.databases.GO.gene_ontology.produce.read.split(/\[Term\]/).each{|term|
|
31
31
|
term_info = {}
|
32
32
|
|
33
33
|
term.split(/\n/). select{|l| l =~ /:/}.each{|l|
|
data/lib/rbbt/sources/pina.rb
CHANGED
@@ -7,7 +7,7 @@ module Pina
|
|
7
7
|
Pina.claim Pina.protein_protein, :proc do
|
8
8
|
require 'rbbt/sources/organism'
|
9
9
|
|
10
|
-
url = "
|
10
|
+
url = "https://omics.bjcancer.org/pina/download/Homo%20sapiens-20140521.tsv"
|
11
11
|
|
12
12
|
dumper = TSV::Dumper.new :type => :double,
|
13
13
|
:key_field => 'UniProt/SwissProt Accession', :namespace => Organism.default_code("Hsa"),
|
data/lib/rbbt/sources/pubmed.rb
CHANGED
@@ -109,6 +109,14 @@ module PubMed
|
|
109
109
|
end
|
110
110
|
end
|
111
111
|
|
112
|
+
def pmc_full_xml
|
113
|
+
begin
|
114
|
+
Open.read("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=#{pmid}")
|
115
|
+
rescue
|
116
|
+
nil
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
112
120
|
def pdf_url
|
113
121
|
return pmc_pdf if pmc_pdf
|
114
122
|
@gscholar_pdf ||= begin
|
@@ -121,18 +129,22 @@ module PubMed
|
|
121
129
|
end
|
122
130
|
|
123
131
|
def full_text
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
132
|
+
text = if pdf_url
|
133
|
+
text = nil
|
134
|
+
TmpFile.with_file do |pdf|
|
135
|
+
# Change user-agent, oh well...
|
136
|
+
`wget --user-agent=firefox #{ pdf_url } -O #{ pdf } -t 3`
|
137
|
+
TmpFile.with_file do |txt|
|
138
|
+
`pdftotext #{ pdf } #{ txt }`
|
139
|
+
text = Open.read(txt) if File.exists? txt
|
140
|
+
end
|
141
|
+
end
|
142
|
+
text
|
143
|
+
elsif pmc_full_xml
|
144
|
+
pmc_full_xml
|
145
|
+
else
|
146
|
+
nil
|
147
|
+
end
|
136
148
|
|
137
149
|
Misc.fixutf8(text)
|
138
150
|
end
|
@@ -177,46 +189,46 @@ module PubMed
|
|
177
189
|
# ID specified as an argument. If +pmid+ is an array instead of a single
|
178
190
|
# identifier it returns an hash with the Article object for each id.
|
179
191
|
# It uses the Rbbt cache to save the articles xml.
|
180
|
-
def self.get_article(pmid)
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
end
|
192
|
+
#def self.get_article(pmid)
|
193
|
+
|
194
|
+
# if pmid.is_a? Array
|
195
|
+
# missing = []
|
196
|
+
# list = {}
|
197
|
+
|
198
|
+
# pmid.each{|p|
|
199
|
+
# filename = p.to_s + '.xml'
|
200
|
+
# if File.exists? FileCache.path(filename)
|
201
|
+
# list[p] = Article.new(Open.read(FileCache.path(filename)))
|
202
|
+
# else
|
203
|
+
# missing << p
|
204
|
+
# end
|
205
|
+
# }
|
206
|
+
|
207
|
+
# return list unless missing.any?
|
208
|
+
|
209
|
+
# articles = get_online(missing)
|
210
|
+
|
211
|
+
# articles.each{|p, xml|
|
212
|
+
# filename = p + '.xml'
|
213
|
+
# FileCache.add(filename,xml)
|
214
|
+
# list[p] = Article.new(xml)
|
215
|
+
# }
|
216
|
+
|
217
|
+
# return list
|
218
|
+
|
219
|
+
# else
|
220
|
+
# filename = pmid.to_s + '.xml'
|
221
|
+
|
222
|
+
# if File.exists? FileCache.path(filename)
|
223
|
+
# return Article.new(Open.read(FileCache.path(filename)))
|
224
|
+
# else
|
225
|
+
# xml = get_online(pmid)
|
226
|
+
# FileCache.add(filename,xml)
|
227
|
+
|
228
|
+
# return Article.new(xml)
|
229
|
+
# end
|
230
|
+
# end
|
231
|
+
#end
|
220
232
|
|
221
233
|
def self.get_article(pmids)
|
222
234
|
_array = Array === pmids
|
data/lib/rbbt/sources/tfacts.rb
CHANGED
@@ -2,15 +2,15 @@ require 'rbbt'
|
|
2
2
|
require 'rbbt/tsv'
|
3
3
|
require 'rbbt/resource'
|
4
4
|
|
5
|
-
module
|
5
|
+
module TFactS
|
6
6
|
extend Resource
|
7
|
-
self.subdir = "share/databases/
|
7
|
+
self.subdir = "share/databases/TFactS"
|
8
8
|
|
9
|
-
|
9
|
+
TFactS.claim TFactS[".source"]["Catalogues.xls"], :url, "http://www.tfacts.org/TFactS-new/TFactS-v2/tfacts/data/Catalogues.xls"
|
10
10
|
|
11
|
-
|
11
|
+
TFactS.claim TFactS.targets, :proc do
|
12
12
|
require 'spreadsheet'
|
13
|
-
book = Spreadsheet.open
|
13
|
+
book = Spreadsheet.open TFactS[".source"]["Catalogues.xls"].produce.find
|
14
14
|
sheet = book.worksheet 0
|
15
15
|
|
16
16
|
tsv = TSV.setup({}, :key_field => "Target Gene (Associated Gene Name)", :fields => ["Transcription Factor (Associated Gene Name)"], :namespace => "Hsa", :type => :flat)
|
@@ -24,9 +24,9 @@ module TFacts
|
|
24
24
|
tsv.to_s
|
25
25
|
end
|
26
26
|
|
27
|
-
|
27
|
+
TFactS.claim TFactS.targets_signed, :proc do
|
28
28
|
require 'spreadsheet'
|
29
|
-
book = Spreadsheet.open
|
29
|
+
book = Spreadsheet.open TFactS[".source"]["Catalogues.xls"].produce.find
|
30
30
|
sheet = book.worksheet 1
|
31
31
|
|
32
32
|
tsv = TSV.setup({}, :key_field => "Target Gene (Associated Gene Name)", :fields => ["Transcription Factor (Associated Gene Name)", "Sign", "PMID"], :namespace => "Hsa", :type => :double)
|
@@ -43,13 +43,13 @@ module TFacts
|
|
43
43
|
tsv.to_s
|
44
44
|
end
|
45
45
|
|
46
|
-
|
47
|
-
|
46
|
+
TFactS.claim TFactS.regulators, :proc do
|
47
|
+
TFactS.targets.tsv.reorder("Transcription Factor (Associated Gene Name)").to_s
|
48
48
|
end
|
49
49
|
|
50
|
-
|
50
|
+
TFactS.claim TFactS.tf_tg, :proc do
|
51
51
|
require 'spreadsheet'
|
52
|
-
book = Spreadsheet.open
|
52
|
+
book = Spreadsheet.open TFactS[".source"]["Catalogues.xls"].produce.find
|
53
53
|
|
54
54
|
tsv = TSV.setup({}, :key_field => "Transcription Factor (Associated Gene Name)", :fields => ["Target Gene (Associated Gene Name)", "Sign", "Species", "Source", "PMID"], :namespace => "Hsa", :type => :double)
|
55
55
|
|
@@ -128,16 +128,16 @@ if defined? Entity and defined? Gene and Entity === Gene
|
|
128
128
|
|
129
129
|
module Gene
|
130
130
|
property :is_transcription_factor? => :array2single do
|
131
|
-
tfs =
|
131
|
+
tfs = TFactS.targets.keys
|
132
132
|
self.name.collect{|gene| tfs.include? gene}
|
133
133
|
end
|
134
134
|
|
135
135
|
property :transcription_regulators => :array2single do
|
136
|
-
Gene.setup(
|
136
|
+
Gene.setup(TFactS.regulators.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
|
137
137
|
end
|
138
138
|
|
139
139
|
property :transcription_targets => :array2single do
|
140
|
-
Gene.setup(
|
140
|
+
Gene.setup(TFactS.targets.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
|
141
141
|
end
|
142
142
|
end
|
143
143
|
end
|
@@ -5,7 +5,7 @@ require File.join(File.dirname(__FILE__), '../../lib/helpers')
|
|
5
5
|
|
6
6
|
$taxs = [9606]
|
7
7
|
$scientific_name = "Homo sapiens"
|
8
|
-
$ortholog_key = "
|
8
|
+
$ortholog_key = "hsapiens_homolog_ensembl_gene"
|
9
9
|
|
10
10
|
$biomart_db = 'hsapiens_gene_ensembl'
|
11
11
|
$biomart_db_germline_variation = 'hsapiens_snp'
|
@@ -5,7 +5,7 @@ require File.join(File.dirname(__FILE__), '../../lib/helpers')
|
|
5
5
|
|
6
6
|
$taxs = [10090]
|
7
7
|
$scientific_name = "Mus musculus"
|
8
|
-
$ortholog_key = "
|
8
|
+
$ortholog_key = "mmusculus_homolog_ensembl_gene"
|
9
9
|
|
10
10
|
$biomart_db = 'mmusculus_gene_ensembl'
|
11
11
|
$biomart_db_germline_variation = 'mmusculus_snp'
|
@@ -9,7 +9,7 @@ $scientific_name = "Rattus norvegicus"
|
|
9
9
|
$biomart_db = 'rnorvegicus_gene_ensembl'
|
10
10
|
$biomart_db_germline_variation = 'rnorvegicus_snp'
|
11
11
|
$biomart_db_somatic_variation = 'rnorvegicus_snp_som'
|
12
|
-
$ortholog_key = "
|
12
|
+
$ortholog_key = "rnorvegicus_homolog_ensembl_gene"
|
13
13
|
|
14
14
|
$biomart_lexicon = [
|
15
15
|
[ 'Associated Gene Name' , "external_gene_id"],
|
@@ -547,13 +547,13 @@ end
|
|
547
547
|
rule /^possible_ortholog_(.*)/ do |t|
|
548
548
|
other = t.name.match(/ortholog_(.*)/)[1]
|
549
549
|
other_key = Organism.ortholog_key(other).produce.read
|
550
|
-
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :
|
550
|
+
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :double, :filename => t.name, :namespace => Thread.current['namespace'])
|
551
551
|
end
|
552
552
|
|
553
553
|
rule /^ortholog_(.*)/ do |t|
|
554
554
|
other = t.name.match(/ortholog_(.*)/)[1]
|
555
555
|
other_key = Organism.ortholog_key(other).produce.read
|
556
|
-
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :
|
556
|
+
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :double, :filename => t.name, :namespace => Thread.current['namespace'])
|
557
557
|
end
|
558
558
|
|
559
559
|
rule /[a-z]{3}[0-9]{4}\/.*/i do |t|
|
@@ -17,6 +17,12 @@ class TestPubMed < Test::Unit::TestCase
|
|
17
17
|
pmid = '16438716'
|
18
18
|
assert(PubMed.get_article(pmid).full_text =~ /Discovering/)
|
19
19
|
end
|
20
|
+
|
21
|
+
def test_pmc_full_xml
|
22
|
+
pmid = '4304705'
|
23
|
+
assert PubMed.get_article(pmid).pmc_full_xml.include?("HBV antigen")
|
24
|
+
end
|
25
|
+
|
20
26
|
|
21
27
|
def test_query
|
22
28
|
assert(PubMed.query('chagoyen[All Fields] AND ("loattrfull text"[sb] AND hasabstract[text])').include? '16438716')
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.47
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-07-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -24,20 +24,6 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 4.0.0
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rbbt-text
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
28
|
name: mechanize
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|