rbbt-sources 3.1.45 → 3.1.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/etc/biomart/missing_in_archive +4 -0
- data/lib/rbbt/sources/biomart.rb +4 -0
- data/lib/rbbt/sources/ensembl_ftp.rb +19 -7
- data/lib/rbbt/sources/organism.rb +8 -4
- data/lib/rbbt/sources/tfacts.rb +14 -14
- data/share/Ensembl/release_dates +5 -0
- data/share/install/Organism/Hsa/Rakefile +1 -1
- data/share/install/Organism/Mmu/Rakefile +1 -1
- data/share/install/Organism/Rno/Rakefile +2 -1
- data/share/install/Organism/organism_helpers.rb +8 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 76650eb77129f5a072416944c24e3155e507ffe75f79372c505d62b919899e48
|
4
|
+
data.tar.gz: 70bf96eb1fa599f121cf65f40388aae545c4a6bb03ca006bc24f4ec83c78175d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dc1fc3dd34681ee50feaff3a447b98abf484eb5ed2775804e6aa2268e14dede03e16970ed7c5bfe5c5015b081514cb3309f178cbca973434848f83368ba27778
|
7
|
+
data.tar.gz: 1ce0587059951ee335cf08f3cf22ceb24c0b0deaaf9798689be141a53871934e1e679efbf2faf578fb10bf3088386539204cecc121a68becddedcdb0aad988c5
|
data/lib/rbbt/sources/biomart.rb
CHANGED
@@ -134,11 +134,14 @@ module BioMart
|
|
134
134
|
# cause an error if the BioMart WS does not allow filtering with that
|
135
135
|
# attribute.
|
136
136
|
def self.query(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
|
137
|
+
IndiferentHash.setup(open_options)
|
137
138
|
open_options = Misc.add_defaults open_options, :nocache => false, :filename => nil, :field_names => nil, :by_chr => false
|
138
139
|
filename, field_names, by_chr = Misc.process_options open_options, :filename, :field_names, :by_chr
|
139
140
|
attrs ||= []
|
140
141
|
open_options = Misc.add_defaults open_options, :keep_empty => false, :merge => true
|
141
142
|
|
143
|
+
IndiferentHash.setup(open_options)
|
144
|
+
|
142
145
|
Log.low "BioMart query: '#{main}' [#{(attrs || []) * ', '}] [#{(filters || []) * ', '}] #{open_options.inspect}"
|
143
146
|
|
144
147
|
max_items = 2
|
@@ -167,6 +170,7 @@ module BioMart
|
|
167
170
|
end
|
168
171
|
|
169
172
|
open_options[:filename] = "BioMart[#{main}+#{attrs.length}]"
|
173
|
+
|
170
174
|
if filename.nil?
|
171
175
|
results = TSV.open data, open_options
|
172
176
|
results.key_field = main
|
@@ -52,24 +52,35 @@ module Ensembl
|
|
52
52
|
File.join("ftp://" + SERVER, ftp_directory_for(organism) )
|
53
53
|
end
|
54
54
|
|
55
|
-
def self.url_for(organism, table)
|
56
|
-
|
55
|
+
def self.url_for(organism, table, extension)
|
56
|
+
File.join(base_url(organism), table) + ".#{extension}.gz"
|
57
|
+
end
|
58
|
+
|
59
|
+
def self._get_gz(url)
|
60
|
+
begin
|
61
|
+
CMD.cmd("wget '#{url}' -O - | gunzip").read
|
62
|
+
rescue
|
63
|
+
CMD.cmd("wget '#{url}.bz2' -O - | bunzip2 | gunzip").read
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def self._get_file(organism, table, extension)
|
68
|
+
url = url_for(organism, table, extension)
|
69
|
+
self._get_gz(url)
|
57
70
|
end
|
58
71
|
|
59
72
|
def self.has_table?(organism, table)
|
60
|
-
sql_file =
|
73
|
+
sql_file = _get_file(organism, File.basename(base_url(organism)), 'sql')
|
61
74
|
! sql_file.match(/^CREATE TABLE .#{table}. \((.*?)^\)/sm).nil?
|
62
75
|
end
|
63
76
|
|
64
77
|
def self.fields_for(organism, table)
|
65
|
-
sql_file =
|
66
|
-
|
78
|
+
sql_file = _get_file(organism, File.basename(base_url(organism)), 'sql')
|
67
79
|
chunk = sql_file.match(/^CREATE TABLE .#{table}. \((.*?)^\)/sm)[1]
|
68
80
|
chunk.scan(/^\s+`(.*?)`/).flatten
|
69
81
|
end
|
70
82
|
|
71
83
|
def self.ensembl_tsv(organism, table, key_field = nil, fields = nil, options = {})
|
72
|
-
url = url_for(organism, table)
|
73
84
|
if key_field and fields
|
74
85
|
all_fields = fields_for(organism, table)
|
75
86
|
key_pos = all_fields.index key_field
|
@@ -78,7 +89,8 @@ module Ensembl
|
|
78
89
|
options[:key_field] = key_pos
|
79
90
|
options[:fields] = field_pos
|
80
91
|
end
|
81
|
-
|
92
|
+
|
93
|
+
tsv = TSV.open(StringIO.new(_get_file(organism, table, "txt")), options)
|
82
94
|
tsv.key_field = key_field
|
83
95
|
tsv.fields = fields
|
84
96
|
tsv
|
@@ -88,8 +88,10 @@ module Organism
|
|
88
88
|
end
|
89
89
|
when "Mmu"
|
90
90
|
"mm10"
|
91
|
+
when "Rno"
|
92
|
+
"rn6"
|
91
93
|
else
|
92
|
-
raise "Only organism 'Hsa' (Homo sapiens) and Mmu (Mus musculus) supported"
|
94
|
+
raise "Only organism 'Hsa' (Homo sapiens), 'Rno' (Rattus norvegicus), and Mmu (Mus musculus) supported"
|
93
95
|
end
|
94
96
|
end
|
95
97
|
|
@@ -117,6 +119,8 @@ module Organism
|
|
117
119
|
end
|
118
120
|
when "Mmu"
|
119
121
|
"GRCm38"
|
122
|
+
when "Rno"
|
123
|
+
"Rnor_6.0"
|
120
124
|
else
|
121
125
|
raise "Only organism 'Hsa' (Homo sapiens) and Mmu (Mus musculus) supported"
|
122
126
|
end
|
@@ -145,12 +149,12 @@ module Organism
|
|
145
149
|
new_positions = {}
|
146
150
|
|
147
151
|
TmpFile.with_file(positions_bed) do |source_bed|
|
148
|
-
TmpFile.with_file
|
149
|
-
TmpFile.with_file
|
152
|
+
TmpFile.with_file do |unmapped_file|
|
153
|
+
TmpFile.with_file do |map_file|
|
150
154
|
|
151
155
|
|
152
156
|
Open.write(map_file, Open.read(map_url))
|
153
|
-
new_mutations = TmpFile.with_file
|
157
|
+
new_mutations = TmpFile.with_file do |target_bed|
|
154
158
|
FileUtils.chmod(755, Rbbt.software.opt.bin.liftOver.produce.find)
|
155
159
|
CMD.cmd("#{Rbbt.software.opt.bin.liftOver.find} '#{source_bed}' '#{map_file}' '#{target_bed}' '#{unmapped_file}'").read
|
156
160
|
Open.read(target_bed) do |line|
|
data/lib/rbbt/sources/tfacts.rb
CHANGED
@@ -2,15 +2,15 @@ require 'rbbt'
|
|
2
2
|
require 'rbbt/tsv'
|
3
3
|
require 'rbbt/resource'
|
4
4
|
|
5
|
-
module
|
5
|
+
module TFactS
|
6
6
|
extend Resource
|
7
|
-
self.subdir = "share/databases/
|
7
|
+
self.subdir = "share/databases/TFactS"
|
8
8
|
|
9
|
-
|
9
|
+
TFactS.claim TFactS[".source"]["Catalogues.xls"], :url, "http://www.tfacts.org/TFactS-new/TFactS-v2/tfacts/data/Catalogues.xls"
|
10
10
|
|
11
|
-
|
11
|
+
TFactS.claim TFactS.targets, :proc do
|
12
12
|
require 'spreadsheet'
|
13
|
-
book = Spreadsheet.open
|
13
|
+
book = Spreadsheet.open TFactS[".source"]["Catalogues.xls"].produce.find
|
14
14
|
sheet = book.worksheet 0
|
15
15
|
|
16
16
|
tsv = TSV.setup({}, :key_field => "Target Gene (Associated Gene Name)", :fields => ["Transcription Factor (Associated Gene Name)"], :namespace => "Hsa", :type => :flat)
|
@@ -24,9 +24,9 @@ module TFacts
|
|
24
24
|
tsv.to_s
|
25
25
|
end
|
26
26
|
|
27
|
-
|
27
|
+
TFactS.claim TFactS.targets_signed, :proc do
|
28
28
|
require 'spreadsheet'
|
29
|
-
book = Spreadsheet.open
|
29
|
+
book = Spreadsheet.open TFactS[".source"]["Catalogues.xls"].produce.find
|
30
30
|
sheet = book.worksheet 1
|
31
31
|
|
32
32
|
tsv = TSV.setup({}, :key_field => "Target Gene (Associated Gene Name)", :fields => ["Transcription Factor (Associated Gene Name)", "Sign", "PMID"], :namespace => "Hsa", :type => :double)
|
@@ -43,13 +43,13 @@ module TFacts
|
|
43
43
|
tsv.to_s
|
44
44
|
end
|
45
45
|
|
46
|
-
|
47
|
-
|
46
|
+
TFactS.claim TFactS.regulators, :proc do
|
47
|
+
TFactS.targets.tsv.reorder("Transcription Factor (Associated Gene Name)").to_s
|
48
48
|
end
|
49
49
|
|
50
|
-
|
50
|
+
TFactS.claim TFactS.tf_tg, :proc do
|
51
51
|
require 'spreadsheet'
|
52
|
-
book = Spreadsheet.open
|
52
|
+
book = Spreadsheet.open TFactS[".source"]["Catalogues.xls"].produce.find
|
53
53
|
|
54
54
|
tsv = TSV.setup({}, :key_field => "Transcription Factor (Associated Gene Name)", :fields => ["Target Gene (Associated Gene Name)", "Sign", "Species", "Source", "PMID"], :namespace => "Hsa", :type => :double)
|
55
55
|
|
@@ -128,16 +128,16 @@ if defined? Entity and defined? Gene and Entity === Gene
|
|
128
128
|
|
129
129
|
module Gene
|
130
130
|
property :is_transcription_factor? => :array2single do
|
131
|
-
tfs =
|
131
|
+
tfs = TFactS.targets.keys
|
132
132
|
self.name.collect{|gene| tfs.include? gene}
|
133
133
|
end
|
134
134
|
|
135
135
|
property :transcription_regulators => :array2single do
|
136
|
-
Gene.setup(
|
136
|
+
Gene.setup(TFactS.regulators.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
|
137
137
|
end
|
138
138
|
|
139
139
|
property :transcription_targets => :array2single do
|
140
|
-
Gene.setup(
|
140
|
+
Gene.setup(TFactS.targets.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
|
141
141
|
end
|
142
142
|
end
|
143
143
|
end
|
data/share/Ensembl/release_dates
CHANGED
@@ -5,7 +5,7 @@ require File.join(File.dirname(__FILE__), '../../lib/helpers')
|
|
5
5
|
|
6
6
|
$taxs = [9606]
|
7
7
|
$scientific_name = "Homo sapiens"
|
8
|
-
$ortholog_key = "
|
8
|
+
$ortholog_key = "hsapiens_homolog_ensembl_gene"
|
9
9
|
|
10
10
|
$biomart_db = 'hsapiens_gene_ensembl'
|
11
11
|
$biomart_db_germline_variation = 'hsapiens_snp'
|
@@ -5,7 +5,7 @@ require File.join(File.dirname(__FILE__), '../../lib/helpers')
|
|
5
5
|
|
6
6
|
$taxs = [10090]
|
7
7
|
$scientific_name = "Mus musculus"
|
8
|
-
$ortholog_key = "
|
8
|
+
$ortholog_key = "mmusculus_homolog_ensembl_gene"
|
9
9
|
|
10
10
|
$biomart_db = 'mmusculus_gene_ensembl'
|
11
11
|
$biomart_db_germline_variation = 'mmusculus_snp'
|
@@ -9,7 +9,7 @@ $scientific_name = "Rattus norvegicus"
|
|
9
9
|
$biomart_db = 'rnorvegicus_gene_ensembl'
|
10
10
|
$biomart_db_germline_variation = 'rnorvegicus_snp'
|
11
11
|
$biomart_db_somatic_variation = 'rnorvegicus_snp_som'
|
12
|
-
$ortholog_key = "
|
12
|
+
$ortholog_key = "rnorvegicus_homolog_ensembl_gene"
|
13
13
|
|
14
14
|
$biomart_lexicon = [
|
15
15
|
[ 'Associated Gene Name' , "external_gene_id"],
|
@@ -20,6 +20,7 @@ $biomart_lexicon = [
|
|
20
20
|
|
21
21
|
$biomart_identifiers = [
|
22
22
|
['Entrez Gene ID', "entrezgene"],
|
23
|
+
['Ensembl Protein ID', "ensembl_peptide_id" ],
|
23
24
|
['Associated Gene Name' , "rgd_symbol"],
|
24
25
|
['Protein ID' , "protein_id"] ,
|
25
26
|
['UniProt/SwissProt ID' , "uniprot_swissprot"] ,
|
@@ -547,13 +547,13 @@ end
|
|
547
547
|
rule /^possible_ortholog_(.*)/ do |t|
|
548
548
|
other = t.name.match(/ortholog_(.*)/)[1]
|
549
549
|
other_key = Organism.ortholog_key(other).produce.read
|
550
|
-
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :
|
550
|
+
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :double, :filename => t.name, :namespace => Thread.current['namespace'])
|
551
551
|
end
|
552
552
|
|
553
553
|
rule /^ortholog_(.*)/ do |t|
|
554
554
|
other = t.name.match(/ortholog_(.*)/)[1]
|
555
555
|
other_key = Organism.ortholog_key(other).produce.read
|
556
|
-
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :
|
556
|
+
BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :double, :filename => t.name, :namespace => Thread.current['namespace'])
|
557
557
|
end
|
558
558
|
|
559
559
|
rule /[a-z]{3}[0-9]{4}\/.*/i do |t|
|
@@ -728,13 +728,18 @@ file 'protein_sequence' => ["transcripts", "transcript_5utr", "transcript_3utr",
|
|
728
728
|
transcript_sequence.through do |transcript, sequence|
|
729
729
|
protein = transcript_protein[transcript]
|
730
730
|
next if protein.nil? or protein.empty?
|
731
|
+
|
731
732
|
utr5 = transcript_5utr[transcript]
|
732
733
|
utr3 = transcript_3utr[transcript]
|
733
734
|
phase = transcript_phase[transcript] || 0
|
735
|
+
|
734
736
|
if phase < 0
|
735
|
-
utr5
|
737
|
+
if utr5.nil? || utr5 == 0 || utr5 == "0"
|
738
|
+
utr5 = 0
|
739
|
+
end
|
736
740
|
phase = 0
|
737
741
|
end
|
742
|
+
|
738
743
|
psequence = Bio::Sequence::NA.new(("N" * phase) << sequence[utr5..sequence.length-utr3-1]).translate
|
739
744
|
protein_sequence[protein]=psequence
|
740
745
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.50
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-11-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|