rbbt-sources 3.1.46 → 3.1.51
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/etc/biomart/missing_in_archive +4 -0
- data/lib/rbbt/sources/biomart.rb +4 -0
- data/lib/rbbt/sources/ensembl_ftp.rb +19 -7
- data/lib/rbbt/sources/organism.rb +8 -4
- data/lib/rbbt/sources/uniprot.rb +7 -0
- data/share/Ensembl/release_dates +5 -0
- data/share/install/Organism/Hsa/Rakefile +1 -1
- data/share/install/Organism/Mmu/Rakefile +1 -1
- data/share/install/Organism/Rno/Rakefile +2 -1
- data/share/install/Organism/organism_helpers.rb +8 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4cb92f1a2c300c6787870f06f5b4ea45967242a33e7c51cf94f002d1901000af
|
4
|
+
data.tar.gz: 793c56312f02532861451142988b24463de6a30d460f6f7a7238889b91c9c336
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 95ab052b23bc28f919e9cf242e0a74e1e433335879d77fc38752925f74e5ec92b8b42b1c27f1fee170bb3125be1a04b83b24c7da524c46efce07241ff169bdff
|
7
|
+
data.tar.gz: bbe1970028942398b1b84e314d704acb0fac405045785e45516e8dbd65927852874bcceb0393748fd68a507c6d4292fda579d1df16d7831d47b5b9ea0de6bf7d
|
data/lib/rbbt/sources/biomart.rb
CHANGED
@@ -134,11 +134,14 @@ module BioMart
|
|
134
134
|
# cause an error if the BioMart WS does not allow filtering with that
|
135
135
|
# attribute.
|
136
136
|
def self.query(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
|
137
|
+
IndiferentHash.setup(open_options)
|
137
138
|
open_options = Misc.add_defaults open_options, :nocache => false, :filename => nil, :field_names => nil, :by_chr => false
|
138
139
|
filename, field_names, by_chr = Misc.process_options open_options, :filename, :field_names, :by_chr
|
139
140
|
attrs ||= []
|
140
141
|
open_options = Misc.add_defaults open_options, :keep_empty => false, :merge => true
|
141
142
|
|
143
|
+
IndiferentHash.setup(open_options)
|
144
|
+
|
142
145
|
Log.low "BioMart query: '#{main}' [#{(attrs || []) * ', '}] [#{(filters || []) * ', '}] #{open_options.inspect}"
|
143
146
|
|
144
147
|
max_items = 2
|
@@ -167,6 +170,7 @@ module BioMart
|
|
167
170
|
end
|
168
171
|
|
169
172
|
open_options[:filename] = "BioMart[#{main}+#{attrs.length}]"
|
173
|
+
|
170
174
|
if filename.nil?
|
171
175
|
results = TSV.open data, open_options
|
172
176
|
results.key_field = main
|
@@ -52,24 +52,35 @@ module Ensembl
|
|
52
52
|
File.join("ftp://" + SERVER, ftp_directory_for(organism) )
|
53
53
|
end
|
54
54
|
|
55
|
-
def self.url_for(organism, table)
|
56
|
-
|
55
|
+
def self.url_for(organism, table, extension)
|
56
|
+
File.join(base_url(organism), table) + ".#{extension}.gz"
|
57
|
+
end
|
58
|
+
|
59
|
+
def self._get_gz(url)
|
60
|
+
begin
|
61
|
+
CMD.cmd("wget '#{url}' -O - | gunzip").read
|
62
|
+
rescue
|
63
|
+
CMD.cmd("wget '#{url}.bz2' -O - | bunzip2 | gunzip").read
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def self._get_file(organism, table, extension)
|
68
|
+
url = url_for(organism, table, extension)
|
69
|
+
self._get_gz(url)
|
57
70
|
end
|
58
71
|
|
59
72
|
def self.has_table?(organism, table)
|
60
|
-
sql_file =
|
73
|
+
sql_file = _get_file(organism, File.basename(base_url(organism)), 'sql')
|
61
74
|
! sql_file.match(/^CREATE TABLE .#{table}. \((.*?)^\)/sm).nil?
|
62
75
|
end
|
63
76
|
|
64
77
|
def self.fields_for(organism, table)
|
65
|
-
sql_file =
|
66
|
-
|
78
|
+
sql_file = _get_file(organism, File.basename(base_url(organism)), 'sql')
|
67
79
|
chunk = sql_file.match(/^CREATE TABLE .#{table}. \((.*?)^\)/sm)[1]
|
68
80
|
chunk.scan(/^\s+`(.*?)`/).flatten
|
69
81
|
end
|
70
82
|
|
71
83
|
def self.ensembl_tsv(organism, table, key_field = nil, fields = nil, options = {})
|
72
|
-
url = url_for(organism, table)
|
73
84
|
if key_field and fields
|
74
85
|
all_fields = fields_for(organism, table)
|
75
86
|
key_pos = all_fields.index key_field
|
@@ -78,7 +89,8 @@ module Ensembl
|
|
78
89
|
options[:key_field] = key_pos
|
79
90
|
options[:fields] = field_pos
|
80
91
|
end
|
81
|
-
|
92
|
+
|
93
|
+
tsv = TSV.open(StringIO.new(_get_file(organism, table, "txt")), options)
|
82
94
|
tsv.key_field = key_field
|
83
95
|
tsv.fields = fields
|
84
96
|
tsv
|
@@ -88,8 +88,10 @@ module Organism
|
|
88
88
|
end
|
89
89
|
when "Mmu"
|
90
90
|
"mm10"
|
91
|
+
when "Rno"
|
92
|
+
"rn6"
|
91
93
|
else
|
92
|
-
raise "Only organism 'Hsa' (Homo sapiens) and Mmu (Mus musculus) supported"
|
94
|
+
raise "Only organism 'Hsa' (Homo sapiens), 'Rno' (Rattus norvegicus), and Mmu (Mus musculus) supported"
|
93
95
|
end
|
94
96
|
end
|
95
97
|
|
@@ -117,6 +119,8 @@ module Organism
|
|
117
119
|
end
|
118
120
|
when "Mmu"
|
119
121
|
"GRCm38"
|
122
|
+
when "Rno"
|
123
|
+
"Rnor_6.0"
|
120
124
|
else
|
121
125
|
raise "Only organism 'Hsa' (Homo sapiens) and Mmu (Mus musculus) supported"
|
122
126
|
end
|
@@ -145,12 +149,12 @@ module Organism
|
|
145
149
|
new_positions = {}
|
146
150
|
|
147
151
|
TmpFile.with_file(positions_bed) do |source_bed|
|
148
|
-
TmpFile.with_file
|
149
|
-
TmpFile.with_file
|
152
|
+
TmpFile.with_file do |unmapped_file|
|
153
|
+
TmpFile.with_file do |map_file|
|
150
154
|
|
151
155
|
|
152
156
|
Open.write(map_file, Open.read(map_url))
|
153
|
-
new_mutations = TmpFile.with_file
|
157
|
+
new_mutations = TmpFile.with_file do |target_bed|
|
154
158
|
FileUtils.chmod(755, Rbbt.software.opt.bin.liftOver.produce.find)
|
155
159
|
CMD.cmd("#{Rbbt.software.opt.bin.liftOver.find} '#{source_bed}' '#{map_file}' '#{target_bed}' '#{unmapped_file}'").read
|
156
160
|
Open.read(target_bed) do |line|
|
data/lib/rbbt/sources/uniprot.rb
CHANGED
@@ -203,12 +203,19 @@ module UniProt
|
|
203
203
|
end
|
204
204
|
value = part.gsub("\nFT", '').gsub(/\s+/, ' ')
|
205
205
|
case
|
206
|
+
when value.match(/(\d+)\.\.(\d+) (.*)/)
|
207
|
+
start, eend, description = $1, $2, $3
|
208
|
+
description.gsub(/^FT\s+/m, '')
|
206
209
|
when value.match(/(\d+) (\d+) (.*)/)
|
207
210
|
start, eend, description = $1, $2, $3
|
208
211
|
description.gsub(/^FT\s+/m, '')
|
209
212
|
when value.match(/^\s+(\d+) (\d+)/)
|
210
213
|
start, eend = $1, $2
|
211
214
|
description = nil
|
215
|
+
when value.match(/(\d+) (.*)/)
|
216
|
+
start, description = $1, $2
|
217
|
+
eend = start
|
218
|
+
description.gsub(/^FT\s+/m, '')
|
212
219
|
else
|
213
220
|
Log.debug "Value not understood: #{ value }"
|
214
221
|
end
|
data/share/Ensembl/release_dates
CHANGED
@@ -5,7 +5,7 @@ require File.join(File.dirname(__FILE__), '../../lib/helpers')
|
|
5
5
|
|
6
6
|
$taxs = [9606]
|
7
7
|
$scientific_name = "Homo sapiens"
|
8
|
-
$ortholog_key = "
|
8
|
+
$ortholog_key = "hsapiens_homolog_ensembl_gene"
|
9
9
|
|
10
10
|
$biomart_db = 'hsapiens_gene_ensembl'
|
11
11
|
$biomart_db_germline_variation = 'hsapiens_snp'
|
@@ -5,7 +5,7 @@ require File.join(File.dirname(__FILE__), '../../lib/helpers')
|
|
5
5
|
|
6
6
|
$taxs = [10090]
|
7
7
|
$scientific_name = "Mus musculus"
|
8
|
-
$ortholog_key = "
|
8
|
+
$ortholog_key = "mmusculus_homolog_ensembl_gene"
|
9
9
|
|
10
10
|
$biomart_db = 'mmusculus_gene_ensembl'
|
11
11
|
$biomart_db_germline_variation = 'mmusculus_snp'
|
@@ -9,7 +9,7 @@ $scientific_name = "Rattus norvegicus"
|
|
9
9
|
$biomart_db = 'rnorvegicus_gene_ensembl'
|
10
10
|
$biomart_db_germline_variation = 'rnorvegicus_snp'
|
11
11
|
$biomart_db_somatic_variation = 'rnorvegicus_snp_som'
|
12
|
-
$ortholog_key = "
|
12
|
+
$ortholog_key = "rnorvegicus_homolog_ensembl_gene"
|
13
13
|
|
14
14
|
$biomart_lexicon = [
|
15
15
|
[ 'Associated Gene Name' , "external_gene_id"],
|
@@ -20,6 +20,7 @@ $biomart_lexicon = [
|
|
20
20
|
|
21
21
|
$biomart_identifiers = [
|
22
22
|
['Entrez Gene ID', "entrezgene"],
|
23
|
+
['Ensembl Protein ID', "ensembl_peptide_id" ],
|
23
24
|
['Associated Gene Name' , "rgd_symbol"],
|
24
25
|
['Protein ID' , "protein_id"] ,
|
25
26
|
['UniProt/SwissProt ID' , "uniprot_swissprot"] ,
|
@@ -547,13 +547,13 @@ end
|
|
547
547
|
rule /^possible_ortholog_(.*)/ do |t|
|
548
548
|
other = t.name.match(/ortholog_(.*)/)[1]
|
549
549
|
other_key = Organism.ortholog_key(other).produce.read
|
550
|
-
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :
|
550
|
+
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :double, :filename => t.name, :namespace => Thread.current['namespace'])
|
551
551
|
end
|
552
552
|
|
553
553
|
rule /^ortholog_(.*)/ do |t|
|
554
554
|
other = t.name.match(/ortholog_(.*)/)[1]
|
555
555
|
other_key = Organism.ortholog_key(other).produce.read
|
556
|
-
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :
|
556
|
+
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :double, :filename => t.name, :namespace => Thread.current['namespace'])
|
557
557
|
end
|
558
558
|
|
559
559
|
rule /[a-z]{3}[0-9]{4}\/.*/i do |t|
|
@@ -728,13 +728,18 @@ file 'protein_sequence' => ["transcripts", "transcript_5utr", "transcript_3utr",
|
|
728
728
|
transcript_sequence.through do |transcript, sequence|
|
729
729
|
protein = transcript_protein[transcript]
|
730
730
|
next if protein.nil? or protein.empty?
|
731
|
+
|
731
732
|
utr5 = transcript_5utr[transcript]
|
732
733
|
utr3 = transcript_3utr[transcript]
|
733
734
|
phase = transcript_phase[transcript] || 0
|
735
|
+
|
734
736
|
if phase < 0
|
735
|
-
utr5
|
737
|
+
if utr5.nil? || utr5 == 0 || utr5 == "0"
|
738
|
+
utr5 = 0
|
739
|
+
end
|
736
740
|
phase = 0
|
737
741
|
end
|
742
|
+
|
738
743
|
psequence = Bio::Sequence::NA.new(("N" * phase) << sequence[utr5..sequence.length-utr3-1]).translate
|
739
744
|
protein_sequence[protein]=psequence
|
740
745
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.51
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|