rbbt-sources 3.1.46 → 3.1.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/etc/biomart/missing_in_archive +4 -0
- data/lib/rbbt/sources/biomart.rb +4 -0
- data/lib/rbbt/sources/ensembl_ftp.rb +19 -7
- data/lib/rbbt/sources/organism.rb +8 -4
- data/lib/rbbt/sources/uniprot.rb +7 -0
- data/share/Ensembl/release_dates +5 -0
- data/share/install/Organism/Hsa/Rakefile +1 -1
- data/share/install/Organism/Mmu/Rakefile +1 -1
- data/share/install/Organism/Rno/Rakefile +2 -1
- data/share/install/Organism/organism_helpers.rb +8 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4cb92f1a2c300c6787870f06f5b4ea45967242a33e7c51cf94f002d1901000af
|
4
|
+
data.tar.gz: 793c56312f02532861451142988b24463de6a30d460f6f7a7238889b91c9c336
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 95ab052b23bc28f919e9cf242e0a74e1e433335879d77fc38752925f74e5ec92b8b42b1c27f1fee170bb3125be1a04b83b24c7da524c46efce07241ff169bdff
|
7
|
+
data.tar.gz: bbe1970028942398b1b84e314d704acb0fac405045785e45516e8dbd65927852874bcceb0393748fd68a507c6d4292fda579d1df16d7831d47b5b9ea0de6bf7d
|
data/lib/rbbt/sources/biomart.rb
CHANGED
@@ -134,11 +134,14 @@ module BioMart
|
|
134
134
|
# cause an error if the BioMart WS does not allow filtering with that
|
135
135
|
# attribute.
|
136
136
|
def self.query(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
|
137
|
+
IndiferentHash.setup(open_options)
|
137
138
|
open_options = Misc.add_defaults open_options, :nocache => false, :filename => nil, :field_names => nil, :by_chr => false
|
138
139
|
filename, field_names, by_chr = Misc.process_options open_options, :filename, :field_names, :by_chr
|
139
140
|
attrs ||= []
|
140
141
|
open_options = Misc.add_defaults open_options, :keep_empty => false, :merge => true
|
141
142
|
|
143
|
+
IndiferentHash.setup(open_options)
|
144
|
+
|
142
145
|
Log.low "BioMart query: '#{main}' [#{(attrs || []) * ', '}] [#{(filters || []) * ', '}] #{open_options.inspect}"
|
143
146
|
|
144
147
|
max_items = 2
|
@@ -167,6 +170,7 @@ module BioMart
|
|
167
170
|
end
|
168
171
|
|
169
172
|
open_options[:filename] = "BioMart[#{main}+#{attrs.length}]"
|
173
|
+
|
170
174
|
if filename.nil?
|
171
175
|
results = TSV.open data, open_options
|
172
176
|
results.key_field = main
|
@@ -52,24 +52,35 @@ module Ensembl
|
|
52
52
|
File.join("ftp://" + SERVER, ftp_directory_for(organism) )
|
53
53
|
end
|
54
54
|
|
55
|
-
def self.url_for(organism, table)
|
56
|
-
|
55
|
+
def self.url_for(organism, table, extension)
|
56
|
+
File.join(base_url(organism), table) + ".#{extension}.gz"
|
57
|
+
end
|
58
|
+
|
59
|
+
def self._get_gz(url)
|
60
|
+
begin
|
61
|
+
CMD.cmd("wget '#{url}' -O - | gunzip").read
|
62
|
+
rescue
|
63
|
+
CMD.cmd("wget '#{url}.bz2' -O - | bunzip2 | gunzip").read
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def self._get_file(organism, table, extension)
|
68
|
+
url = url_for(organism, table, extension)
|
69
|
+
self._get_gz(url)
|
57
70
|
end
|
58
71
|
|
59
72
|
def self.has_table?(organism, table)
|
60
|
-
sql_file =
|
73
|
+
sql_file = _get_file(organism, File.basename(base_url(organism)), 'sql')
|
61
74
|
! sql_file.match(/^CREATE TABLE .#{table}. \((.*?)^\)/sm).nil?
|
62
75
|
end
|
63
76
|
|
64
77
|
def self.fields_for(organism, table)
|
65
|
-
sql_file =
|
66
|
-
|
78
|
+
sql_file = _get_file(organism, File.basename(base_url(organism)), 'sql')
|
67
79
|
chunk = sql_file.match(/^CREATE TABLE .#{table}. \((.*?)^\)/sm)[1]
|
68
80
|
chunk.scan(/^\s+`(.*?)`/).flatten
|
69
81
|
end
|
70
82
|
|
71
83
|
def self.ensembl_tsv(organism, table, key_field = nil, fields = nil, options = {})
|
72
|
-
url = url_for(organism, table)
|
73
84
|
if key_field and fields
|
74
85
|
all_fields = fields_for(organism, table)
|
75
86
|
key_pos = all_fields.index key_field
|
@@ -78,7 +89,8 @@ module Ensembl
|
|
78
89
|
options[:key_field] = key_pos
|
79
90
|
options[:fields] = field_pos
|
80
91
|
end
|
81
|
-
|
92
|
+
|
93
|
+
tsv = TSV.open(StringIO.new(_get_file(organism, table, "txt")), options)
|
82
94
|
tsv.key_field = key_field
|
83
95
|
tsv.fields = fields
|
84
96
|
tsv
|
@@ -88,8 +88,10 @@ module Organism
|
|
88
88
|
end
|
89
89
|
when "Mmu"
|
90
90
|
"mm10"
|
91
|
+
when "Rno"
|
92
|
+
"rn6"
|
91
93
|
else
|
92
|
-
raise "Only organism 'Hsa' (Homo sapiens) and Mmu (Mus musculus) supported"
|
94
|
+
raise "Only organism 'Hsa' (Homo sapiens), 'Rno' (Rattus norvegicus), and Mmu (Mus musculus) supported"
|
93
95
|
end
|
94
96
|
end
|
95
97
|
|
@@ -117,6 +119,8 @@ module Organism
|
|
117
119
|
end
|
118
120
|
when "Mmu"
|
119
121
|
"GRCm38"
|
122
|
+
when "Rno"
|
123
|
+
"Rnor_6.0"
|
120
124
|
else
|
121
125
|
raise "Only organism 'Hsa' (Homo sapiens) and Mmu (Mus musculus) supported"
|
122
126
|
end
|
@@ -145,12 +149,12 @@ module Organism
|
|
145
149
|
new_positions = {}
|
146
150
|
|
147
151
|
TmpFile.with_file(positions_bed) do |source_bed|
|
148
|
-
TmpFile.with_file
|
149
|
-
TmpFile.with_file
|
152
|
+
TmpFile.with_file do |unmapped_file|
|
153
|
+
TmpFile.with_file do |map_file|
|
150
154
|
|
151
155
|
|
152
156
|
Open.write(map_file, Open.read(map_url))
|
153
|
-
new_mutations = TmpFile.with_file
|
157
|
+
new_mutations = TmpFile.with_file do |target_bed|
|
154
158
|
FileUtils.chmod(755, Rbbt.software.opt.bin.liftOver.produce.find)
|
155
159
|
CMD.cmd("#{Rbbt.software.opt.bin.liftOver.find} '#{source_bed}' '#{map_file}' '#{target_bed}' '#{unmapped_file}'").read
|
156
160
|
Open.read(target_bed) do |line|
|
data/lib/rbbt/sources/uniprot.rb
CHANGED
@@ -203,12 +203,19 @@ module UniProt
|
|
203
203
|
end
|
204
204
|
value = part.gsub("\nFT", '').gsub(/\s+/, ' ')
|
205
205
|
case
|
206
|
+
when value.match(/(\d+)\.\.(\d+) (.*)/)
|
207
|
+
start, eend, description = $1, $2, $3
|
208
|
+
description.gsub(/^FT\s+/m, '')
|
206
209
|
when value.match(/(\d+) (\d+) (.*)/)
|
207
210
|
start, eend, description = $1, $2, $3
|
208
211
|
description.gsub(/^FT\s+/m, '')
|
209
212
|
when value.match(/^\s+(\d+) (\d+)/)
|
210
213
|
start, eend = $1, $2
|
211
214
|
description = nil
|
215
|
+
when value.match(/(\d+) (.*)/)
|
216
|
+
start, description = $1, $2
|
217
|
+
eend = start
|
218
|
+
description.gsub(/^FT\s+/m, '')
|
212
219
|
else
|
213
220
|
Log.debug "Value not understood: #{ value }"
|
214
221
|
end
|
data/share/Ensembl/release_dates
CHANGED
@@ -5,7 +5,7 @@ require File.join(File.dirname(__FILE__), '../../lib/helpers')
|
|
5
5
|
|
6
6
|
$taxs = [9606]
|
7
7
|
$scientific_name = "Homo sapiens"
|
8
|
-
$ortholog_key = "
|
8
|
+
$ortholog_key = "hsapiens_homolog_ensembl_gene"
|
9
9
|
|
10
10
|
$biomart_db = 'hsapiens_gene_ensembl'
|
11
11
|
$biomart_db_germline_variation = 'hsapiens_snp'
|
@@ -5,7 +5,7 @@ require File.join(File.dirname(__FILE__), '../../lib/helpers')
|
|
5
5
|
|
6
6
|
$taxs = [10090]
|
7
7
|
$scientific_name = "Mus musculus"
|
8
|
-
$ortholog_key = "
|
8
|
+
$ortholog_key = "mmusculus_homolog_ensembl_gene"
|
9
9
|
|
10
10
|
$biomart_db = 'mmusculus_gene_ensembl'
|
11
11
|
$biomart_db_germline_variation = 'mmusculus_snp'
|
@@ -9,7 +9,7 @@ $scientific_name = "Rattus norvegicus"
|
|
9
9
|
$biomart_db = 'rnorvegicus_gene_ensembl'
|
10
10
|
$biomart_db_germline_variation = 'rnorvegicus_snp'
|
11
11
|
$biomart_db_somatic_variation = 'rnorvegicus_snp_som'
|
12
|
-
$ortholog_key = "
|
12
|
+
$ortholog_key = "rnorvegicus_homolog_ensembl_gene"
|
13
13
|
|
14
14
|
$biomart_lexicon = [
|
15
15
|
[ 'Associated Gene Name' , "external_gene_id"],
|
@@ -20,6 +20,7 @@ $biomart_lexicon = [
|
|
20
20
|
|
21
21
|
$biomart_identifiers = [
|
22
22
|
['Entrez Gene ID', "entrezgene"],
|
23
|
+
['Ensembl Protein ID', "ensembl_peptide_id" ],
|
23
24
|
['Associated Gene Name' , "rgd_symbol"],
|
24
25
|
['Protein ID' , "protein_id"] ,
|
25
26
|
['UniProt/SwissProt ID' , "uniprot_swissprot"] ,
|
@@ -547,13 +547,13 @@ end
|
|
547
547
|
rule /^possible_ortholog_(.*)/ do |t|
|
548
548
|
other = t.name.match(/ortholog_(.*)/)[1]
|
549
549
|
other_key = Organism.ortholog_key(other).produce.read
|
550
|
-
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :
|
550
|
+
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :double, :filename => t.name, :namespace => Thread.current['namespace'])
|
551
551
|
end
|
552
552
|
|
553
553
|
rule /^ortholog_(.*)/ do |t|
|
554
554
|
other = t.name.match(/ortholog_(.*)/)[1]
|
555
555
|
other_key = Organism.ortholog_key(other).produce.read
|
556
|
-
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :
|
556
|
+
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :double, :filename => t.name, :namespace => Thread.current['namespace'])
|
557
557
|
end
|
558
558
|
|
559
559
|
rule /[a-z]{3}[0-9]{4}\/.*/i do |t|
|
@@ -728,13 +728,18 @@ file 'protein_sequence' => ["transcripts", "transcript_5utr", "transcript_3utr",
|
|
728
728
|
transcript_sequence.through do |transcript, sequence|
|
729
729
|
protein = transcript_protein[transcript]
|
730
730
|
next if protein.nil? or protein.empty?
|
731
|
+
|
731
732
|
utr5 = transcript_5utr[transcript]
|
732
733
|
utr3 = transcript_3utr[transcript]
|
733
734
|
phase = transcript_phase[transcript] || 0
|
735
|
+
|
734
736
|
if phase < 0
|
735
|
-
utr5
|
737
|
+
if utr5.nil? || utr5 == 0 || utr5 == "0"
|
738
|
+
utr5 = 0
|
739
|
+
end
|
736
740
|
phase = 0
|
737
741
|
end
|
742
|
+
|
738
743
|
psequence = Bio::Sequence::NA.new(("N" * phase) << sequence[utr5..sequence.length-utr3-1]).translate
|
739
744
|
protein_sequence[protein]=psequence
|
740
745
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.51
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|