rbbt-sources 3.1.45 → 3.1.50
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/etc/biomart/missing_in_archive +4 -0
- data/lib/rbbt/sources/biomart.rb +4 -0
- data/lib/rbbt/sources/ensembl_ftp.rb +19 -7
- data/lib/rbbt/sources/organism.rb +8 -4
- data/lib/rbbt/sources/tfacts.rb +14 -14
- data/share/Ensembl/release_dates +5 -0
- data/share/install/Organism/Hsa/Rakefile +1 -1
- data/share/install/Organism/Mmu/Rakefile +1 -1
- data/share/install/Organism/Rno/Rakefile +2 -1
- data/share/install/Organism/organism_helpers.rb +8 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 76650eb77129f5a072416944c24e3155e507ffe75f79372c505d62b919899e48
|
4
|
+
data.tar.gz: 70bf96eb1fa599f121cf65f40388aae545c4a6bb03ca006bc24f4ec83c78175d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dc1fc3dd34681ee50feaff3a447b98abf484eb5ed2775804e6aa2268e14dede03e16970ed7c5bfe5c5015b081514cb3309f178cbca973434848f83368ba27778
|
7
|
+
data.tar.gz: 1ce0587059951ee335cf08f3cf22ceb24c0b0deaaf9798689be141a53871934e1e679efbf2faf578fb10bf3088386539204cecc121a68becddedcdb0aad988c5
|
data/lib/rbbt/sources/biomart.rb
CHANGED
@@ -134,11 +134,14 @@ module BioMart
|
|
134
134
|
# cause an error if the BioMart WS does not allow filtering with that
|
135
135
|
# attribute.
|
136
136
|
def self.query(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
|
137
|
+
IndiferentHash.setup(open_options)
|
137
138
|
open_options = Misc.add_defaults open_options, :nocache => false, :filename => nil, :field_names => nil, :by_chr => false
|
138
139
|
filename, field_names, by_chr = Misc.process_options open_options, :filename, :field_names, :by_chr
|
139
140
|
attrs ||= []
|
140
141
|
open_options = Misc.add_defaults open_options, :keep_empty => false, :merge => true
|
141
142
|
|
143
|
+
IndiferentHash.setup(open_options)
|
144
|
+
|
142
145
|
Log.low "BioMart query: '#{main}' [#{(attrs || []) * ', '}] [#{(filters || []) * ', '}] #{open_options.inspect}"
|
143
146
|
|
144
147
|
max_items = 2
|
@@ -167,6 +170,7 @@ module BioMart
|
|
167
170
|
end
|
168
171
|
|
169
172
|
open_options[:filename] = "BioMart[#{main}+#{attrs.length}]"
|
173
|
+
|
170
174
|
if filename.nil?
|
171
175
|
results = TSV.open data, open_options
|
172
176
|
results.key_field = main
|
@@ -52,24 +52,35 @@ module Ensembl
|
|
52
52
|
File.join("ftp://" + SERVER, ftp_directory_for(organism) )
|
53
53
|
end
|
54
54
|
|
55
|
-
def self.url_for(organism, table)
|
56
|
-
|
55
|
+
def self.url_for(organism, table, extension)
|
56
|
+
File.join(base_url(organism), table) + ".#{extension}.gz"
|
57
|
+
end
|
58
|
+
|
59
|
+
def self._get_gz(url)
|
60
|
+
begin
|
61
|
+
CMD.cmd("wget '#{url}' -O - | gunzip").read
|
62
|
+
rescue
|
63
|
+
CMD.cmd("wget '#{url}.bz2' -O - | bunzip2 | gunzip").read
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def self._get_file(organism, table, extension)
|
68
|
+
url = url_for(organism, table, extension)
|
69
|
+
self._get_gz(url)
|
57
70
|
end
|
58
71
|
|
59
72
|
def self.has_table?(organism, table)
|
60
|
-
sql_file =
|
73
|
+
sql_file = _get_file(organism, File.basename(base_url(organism)), 'sql')
|
61
74
|
! sql_file.match(/^CREATE TABLE .#{table}. \((.*?)^\)/sm).nil?
|
62
75
|
end
|
63
76
|
|
64
77
|
def self.fields_for(organism, table)
|
65
|
-
sql_file =
|
66
|
-
|
78
|
+
sql_file = _get_file(organism, File.basename(base_url(organism)), 'sql')
|
67
79
|
chunk = sql_file.match(/^CREATE TABLE .#{table}. \((.*?)^\)/sm)[1]
|
68
80
|
chunk.scan(/^\s+`(.*?)`/).flatten
|
69
81
|
end
|
70
82
|
|
71
83
|
def self.ensembl_tsv(organism, table, key_field = nil, fields = nil, options = {})
|
72
|
-
url = url_for(organism, table)
|
73
84
|
if key_field and fields
|
74
85
|
all_fields = fields_for(organism, table)
|
75
86
|
key_pos = all_fields.index key_field
|
@@ -78,7 +89,8 @@ module Ensembl
|
|
78
89
|
options[:key_field] = key_pos
|
79
90
|
options[:fields] = field_pos
|
80
91
|
end
|
81
|
-
|
92
|
+
|
93
|
+
tsv = TSV.open(StringIO.new(_get_file(organism, table, "txt")), options)
|
82
94
|
tsv.key_field = key_field
|
83
95
|
tsv.fields = fields
|
84
96
|
tsv
|
@@ -88,8 +88,10 @@ module Organism
|
|
88
88
|
end
|
89
89
|
when "Mmu"
|
90
90
|
"mm10"
|
91
|
+
when "Rno"
|
92
|
+
"rn6"
|
91
93
|
else
|
92
|
-
raise "Only organism 'Hsa' (Homo sapiens) and Mmu (Mus musculus) supported"
|
94
|
+
raise "Only organism 'Hsa' (Homo sapiens), 'Rno' (Rattus norvegicus), and Mmu (Mus musculus) supported"
|
93
95
|
end
|
94
96
|
end
|
95
97
|
|
@@ -117,6 +119,8 @@ module Organism
|
|
117
119
|
end
|
118
120
|
when "Mmu"
|
119
121
|
"GRCm38"
|
122
|
+
when "Rno"
|
123
|
+
"Rnor_6.0"
|
120
124
|
else
|
121
125
|
raise "Only organism 'Hsa' (Homo sapiens) and Mmu (Mus musculus) supported"
|
122
126
|
end
|
@@ -145,12 +149,12 @@ module Organism
|
|
145
149
|
new_positions = {}
|
146
150
|
|
147
151
|
TmpFile.with_file(positions_bed) do |source_bed|
|
148
|
-
TmpFile.with_file
|
149
|
-
TmpFile.with_file
|
152
|
+
TmpFile.with_file do |unmapped_file|
|
153
|
+
TmpFile.with_file do |map_file|
|
150
154
|
|
151
155
|
|
152
156
|
Open.write(map_file, Open.read(map_url))
|
153
|
-
new_mutations = TmpFile.with_file
|
157
|
+
new_mutations = TmpFile.with_file do |target_bed|
|
154
158
|
FileUtils.chmod(755, Rbbt.software.opt.bin.liftOver.produce.find)
|
155
159
|
CMD.cmd("#{Rbbt.software.opt.bin.liftOver.find} '#{source_bed}' '#{map_file}' '#{target_bed}' '#{unmapped_file}'").read
|
156
160
|
Open.read(target_bed) do |line|
|
data/lib/rbbt/sources/tfacts.rb
CHANGED
@@ -2,15 +2,15 @@ require 'rbbt'
|
|
2
2
|
require 'rbbt/tsv'
|
3
3
|
require 'rbbt/resource'
|
4
4
|
|
5
|
-
module
|
5
|
+
module TFactS
|
6
6
|
extend Resource
|
7
|
-
self.subdir = "share/databases/
|
7
|
+
self.subdir = "share/databases/TFactS"
|
8
8
|
|
9
|
-
|
9
|
+
TFactS.claim TFactS[".source"]["Catalogues.xls"], :url, "http://www.tfacts.org/TFactS-new/TFactS-v2/tfacts/data/Catalogues.xls"
|
10
10
|
|
11
|
-
|
11
|
+
TFactS.claim TFactS.targets, :proc do
|
12
12
|
require 'spreadsheet'
|
13
|
-
book = Spreadsheet.open
|
13
|
+
book = Spreadsheet.open TFactS[".source"]["Catalogues.xls"].produce.find
|
14
14
|
sheet = book.worksheet 0
|
15
15
|
|
16
16
|
tsv = TSV.setup({}, :key_field => "Target Gene (Associated Gene Name)", :fields => ["Transcription Factor (Associated Gene Name)"], :namespace => "Hsa", :type => :flat)
|
@@ -24,9 +24,9 @@ module TFacts
|
|
24
24
|
tsv.to_s
|
25
25
|
end
|
26
26
|
|
27
|
-
|
27
|
+
TFactS.claim TFactS.targets_signed, :proc do
|
28
28
|
require 'spreadsheet'
|
29
|
-
book = Spreadsheet.open
|
29
|
+
book = Spreadsheet.open TFactS[".source"]["Catalogues.xls"].produce.find
|
30
30
|
sheet = book.worksheet 1
|
31
31
|
|
32
32
|
tsv = TSV.setup({}, :key_field => "Target Gene (Associated Gene Name)", :fields => ["Transcription Factor (Associated Gene Name)", "Sign", "PMID"], :namespace => "Hsa", :type => :double)
|
@@ -43,13 +43,13 @@ module TFacts
|
|
43
43
|
tsv.to_s
|
44
44
|
end
|
45
45
|
|
46
|
-
|
47
|
-
|
46
|
+
TFactS.claim TFactS.regulators, :proc do
|
47
|
+
TFactS.targets.tsv.reorder("Transcription Factor (Associated Gene Name)").to_s
|
48
48
|
end
|
49
49
|
|
50
|
-
|
50
|
+
TFactS.claim TFactS.tf_tg, :proc do
|
51
51
|
require 'spreadsheet'
|
52
|
-
book = Spreadsheet.open
|
52
|
+
book = Spreadsheet.open TFactS[".source"]["Catalogues.xls"].produce.find
|
53
53
|
|
54
54
|
tsv = TSV.setup({}, :key_field => "Transcription Factor (Associated Gene Name)", :fields => ["Target Gene (Associated Gene Name)", "Sign", "Species", "Source", "PMID"], :namespace => "Hsa", :type => :double)
|
55
55
|
|
@@ -128,16 +128,16 @@ if defined? Entity and defined? Gene and Entity === Gene
|
|
128
128
|
|
129
129
|
module Gene
|
130
130
|
property :is_transcription_factor? => :array2single do
|
131
|
-
tfs =
|
131
|
+
tfs = TFactS.targets.keys
|
132
132
|
self.name.collect{|gene| tfs.include? gene}
|
133
133
|
end
|
134
134
|
|
135
135
|
property :transcription_regulators => :array2single do
|
136
|
-
Gene.setup(
|
136
|
+
Gene.setup(TFactS.regulators.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
|
137
137
|
end
|
138
138
|
|
139
139
|
property :transcription_targets => :array2single do
|
140
|
-
Gene.setup(
|
140
|
+
Gene.setup(TFactS.targets.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
|
141
141
|
end
|
142
142
|
end
|
143
143
|
end
|
data/share/Ensembl/release_dates
CHANGED
@@ -5,7 +5,7 @@ require File.join(File.dirname(__FILE__), '../../lib/helpers')
|
|
5
5
|
|
6
6
|
$taxs = [9606]
|
7
7
|
$scientific_name = "Homo sapiens"
|
8
|
-
$ortholog_key = "
|
8
|
+
$ortholog_key = "hsapiens_homolog_ensembl_gene"
|
9
9
|
|
10
10
|
$biomart_db = 'hsapiens_gene_ensembl'
|
11
11
|
$biomart_db_germline_variation = 'hsapiens_snp'
|
@@ -5,7 +5,7 @@ require File.join(File.dirname(__FILE__), '../../lib/helpers')
|
|
5
5
|
|
6
6
|
$taxs = [10090]
|
7
7
|
$scientific_name = "Mus musculus"
|
8
|
-
$ortholog_key = "
|
8
|
+
$ortholog_key = "mmusculus_homolog_ensembl_gene"
|
9
9
|
|
10
10
|
$biomart_db = 'mmusculus_gene_ensembl'
|
11
11
|
$biomart_db_germline_variation = 'mmusculus_snp'
|
@@ -9,7 +9,7 @@ $scientific_name = "Rattus norvegicus"
|
|
9
9
|
$biomart_db = 'rnorvegicus_gene_ensembl'
|
10
10
|
$biomart_db_germline_variation = 'rnorvegicus_snp'
|
11
11
|
$biomart_db_somatic_variation = 'rnorvegicus_snp_som'
|
12
|
-
$ortholog_key = "
|
12
|
+
$ortholog_key = "rnorvegicus_homolog_ensembl_gene"
|
13
13
|
|
14
14
|
$biomart_lexicon = [
|
15
15
|
[ 'Associated Gene Name' , "external_gene_id"],
|
@@ -20,6 +20,7 @@ $biomart_lexicon = [
|
|
20
20
|
|
21
21
|
$biomart_identifiers = [
|
22
22
|
['Entrez Gene ID', "entrezgene"],
|
23
|
+
['Ensembl Protein ID', "ensembl_peptide_id" ],
|
23
24
|
['Associated Gene Name' , "rgd_symbol"],
|
24
25
|
['Protein ID' , "protein_id"] ,
|
25
26
|
['UniProt/SwissProt ID' , "uniprot_swissprot"] ,
|
@@ -547,13 +547,13 @@ end
|
|
547
547
|
rule /^possible_ortholog_(.*)/ do |t|
|
548
548
|
other = t.name.match(/ortholog_(.*)/)[1]
|
549
549
|
other_key = Organism.ortholog_key(other).produce.read
|
550
|
-
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :
|
550
|
+
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :double, :filename => t.name, :namespace => Thread.current['namespace'])
|
551
551
|
end
|
552
552
|
|
553
553
|
rule /^ortholog_(.*)/ do |t|
|
554
554
|
other = t.name.match(/ortholog_(.*)/)[1]
|
555
555
|
other_key = Organism.ortholog_key(other).produce.read
|
556
|
-
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :
|
556
|
+
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :double, :filename => t.name, :namespace => Thread.current['namespace'])
|
557
557
|
end
|
558
558
|
|
559
559
|
rule /[a-z]{3}[0-9]{4}\/.*/i do |t|
|
@@ -728,13 +728,18 @@ file 'protein_sequence' => ["transcripts", "transcript_5utr", "transcript_3utr",
|
|
728
728
|
transcript_sequence.through do |transcript, sequence|
|
729
729
|
protein = transcript_protein[transcript]
|
730
730
|
next if protein.nil? or protein.empty?
|
731
|
+
|
731
732
|
utr5 = transcript_5utr[transcript]
|
732
733
|
utr3 = transcript_3utr[transcript]
|
733
734
|
phase = transcript_phase[transcript] || 0
|
735
|
+
|
734
736
|
if phase < 0
|
735
|
-
utr5
|
737
|
+
if utr5.nil? || utr5 == 0 || utr5 == "0"
|
738
|
+
utr5 = 0
|
739
|
+
end
|
736
740
|
phase = 0
|
737
741
|
end
|
742
|
+
|
738
743
|
psequence = Bio::Sequence::NA.new(("N" * phase) << sequence[utr5..sequence.length-utr3-1]).translate
|
739
744
|
protein_sequence[protein]=psequence
|
740
745
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.50
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-11-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|