rbbt-sources 3.1.30 → 3.1.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/etc/xena_hubs +2 -0
- data/lib/rbbt/sources/Xena.rb +109 -0
- data/lib/rbbt/sources/organism.rb +16 -0
- data/share/install/Organism/organism_helpers.rb +3 -2
- data/test/rbbt/sources/test_organism.rb +4 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0c4f4b556028ce477969e38d0b4a1d8a05b01305
|
4
|
+
data.tar.gz: 208e92cf212d96dc3333000d350e598ec45f6361
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1f3e8855b0a233a6d97b80193a92fd90c45e1bfefb0f5b074a4aad2ec7151d9de1dd1c1eae842a84a9f15aeb436af4b97330845ec4ee8defe03735e65de1037f
|
7
|
+
data.tar.gz: a3f529915268806ac18920cfea3786ec2ab906b8492f3f8d6fd7786480a07bf1b17d6b4f5963a6175a1ff231a1697e53099fda66628e5d1fea9f7d525d7cf44e
|
data/etc/xena_hubs
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/resource'
|
3
|
+
require 'rest-client'
|
4
|
+
|
5
|
+
module Xena
|
6
|
+
extend Resource
|
7
|
+
self.subdir = 'share/databases/Xena'
|
8
|
+
|
9
|
+
DEFAULT_XENA_HUB = "https://ucscpublic.xenahubs.net"
|
10
|
+
TCGA_HUB = "https://tcga.xenahubs.net/"
|
11
|
+
#def self.organism(org="Hsa")
|
12
|
+
# Organism.default_code(org)
|
13
|
+
#end
|
14
|
+
|
15
|
+
#self.search_paths = {}
|
16
|
+
#self.search_paths[:default] = :lib
|
17
|
+
|
18
|
+
def self.query(query, hub = DEFAULT_XENA_HUB)
|
19
|
+
url = File.join(hub, 'data/')
|
20
|
+
|
21
|
+
uri = URI.parse(url)
|
22
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
23
|
+
http.use_ssl = true
|
24
|
+
|
25
|
+
header = {'Content-Type': 'text/plain'}
|
26
|
+
request = Net::HTTP::Post.new(uri.request_uri, header)
|
27
|
+
request.body = query
|
28
|
+
response = http.request(request)
|
29
|
+
|
30
|
+
JSON.parse(response.body)
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.tsv(file, hub = DEFAULT_XENA_HUB)
|
34
|
+
url = "#{hub}/download/#{file}"
|
35
|
+
TSV.open(url, :monitor => true, :header_hash => '')
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.cohorts(hub = DEFAULT_XENA_HUB)
|
39
|
+
query =<<-EOF
|
40
|
+
(;allCohorts
|
41
|
+
(fn []
|
42
|
+
(map :cohort
|
43
|
+
(query
|
44
|
+
{:select [[#sql/call [:distinct #sql/call [:ifnull :cohort "(unassigned)"]] :cohort]]
|
45
|
+
:from [:dataset]})))
|
46
|
+
)
|
47
|
+
EOF
|
48
|
+
|
49
|
+
self.query query, hub
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.cohort_dataset(cohort, hub = DEFAULT_XENA_HUB)
|
53
|
+
query =<<-EOF
|
54
|
+
(; datasetList
|
55
|
+
(fn [cohorts]
|
56
|
+
(let [count-table {:select [[:dataset.name :dname] [:%count.value :count]]
|
57
|
+
:from [:dataset]
|
58
|
+
:join [:field [:= :dataset.id :dataset_id]
|
59
|
+
:code [:= :field.id :field_id]]
|
60
|
+
:group-by [:dataset.name]
|
61
|
+
:where [:= :field.name "sampleID"]}]
|
62
|
+
(query {:select [:d.name :d.longtitle :count :d.type :d.datasubtype :d.probemap :d.text :d.status [:pm-dataset.text :pmtext]]
|
63
|
+
:from [[:dataset :d]]
|
64
|
+
:left-join [[:dataset :pm-dataset] [:= :pm-dataset.name :d.probemap]
|
65
|
+
count-table [:= :dname :d.name]]
|
66
|
+
:where [:in :d.cohort cohorts]})
|
67
|
+
))
|
68
|
+
["#{ cohort }"])
|
69
|
+
EOF
|
70
|
+
|
71
|
+
self.query(query, hub)
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
Xena.claim Xena.data, :proc do
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
if __FILE__ == $0
|
80
|
+
|
81
|
+
Xena.cohorts(Xena::TCGA_HUB).each do |cohort|
|
82
|
+
puts Log.color :magenta, cohort
|
83
|
+
Xena.cohort_dataset(cohort, Xena::TCGA_HUB).each do |info|
|
84
|
+
puts Log.color(:blue, "Name: ") + Log.color(:yellow, info["name"].to_s)
|
85
|
+
puts Log.color(:blue, "Title: ") + info["longtitle"].to_s
|
86
|
+
puts Log.color(:blue, "Type: ") + info["type"].to_s
|
87
|
+
puts
|
88
|
+
end
|
89
|
+
puts
|
90
|
+
end
|
91
|
+
|
92
|
+
raise "STOP"
|
93
|
+
|
94
|
+
query_str =<<-EOF
|
95
|
+
(;allCohorts
|
96
|
+
(fn []
|
97
|
+
(map :cohort
|
98
|
+
(query
|
99
|
+
{:select [[#sql/call [:distinct #sql/call [:ifnull :cohort "(unassigned)"]] :cohort]]
|
100
|
+
:from [:dataset]}))))
|
101
|
+
EOF
|
102
|
+
|
103
|
+
Log.severity = 0
|
104
|
+
iii Xena.query(query_str, 'https://ucscpublic.xenahubs.net/')
|
105
|
+
raise "STOP"
|
106
|
+
file = "GTEx_Analysis_v6_RNA-seq_RNA-SeQCv1.1.8_gene_rpkm_log.gz"
|
107
|
+
Log.tsv Xena.tsv(file)
|
108
|
+
end
|
109
|
+
|
@@ -259,4 +259,20 @@ module Organism
|
|
259
259
|
|
260
260
|
Misc.total_length(exon_ranges)
|
261
261
|
end
|
262
|
+
|
263
|
+
def self.chromosome_sizes(organism = Organism.default_code("Hsa"))
|
264
|
+
chromosome_sizes = {}
|
265
|
+
|
266
|
+
Organism[organism].glob_all("chromosome_*").each do |file|
|
267
|
+
chromosome = file.split("_").last.split(".").first
|
268
|
+
size = if Open.gzip?(file) || Open.bgzip?(file)
|
269
|
+
CMD.cmd("zcat '#{ file }' | wc -c ").read
|
270
|
+
else
|
271
|
+
File.size(file)
|
272
|
+
end
|
273
|
+
chromosome_sizes[chromosome] = size.to_i
|
274
|
+
end
|
275
|
+
|
276
|
+
chromosome_sizes
|
277
|
+
end
|
262
278
|
end
|
@@ -573,12 +573,13 @@ end
|
|
573
573
|
#{{{ Special files
|
574
574
|
require 'bio'
|
575
575
|
|
576
|
-
file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
|
576
|
+
file 'transcript_sequence' => ["exons", "transcript_exons", "blacklist_chromosomes"] do |t|
|
577
577
|
exon_info = TSV.open('exons', :type => :list, :fields => ["Exon Strand", "Exon Chr Start", "Exon Chr End", "Chromosome Name"], :unnamed => true)
|
578
578
|
|
579
579
|
chr_transcript_ranges ||= {}
|
580
580
|
transcript_strand = {}
|
581
581
|
|
582
|
+
blacklist_chromosomes = Path.setup(File.expand_path('blacklist_chromosomes')).list
|
582
583
|
transcript_exons = Path.setup(File.expand_path('transcript_exons'))
|
583
584
|
TSV.traverse transcript_exons do |transcript,values|
|
584
585
|
transcript = transcript.first if Array === transcript
|
@@ -604,7 +605,7 @@ file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
|
|
604
605
|
transcript_sequence = {}
|
605
606
|
chr_transcript_ranges.each do |chr, transcript_ranges|
|
606
607
|
begin
|
607
|
-
raise "LRG, GL, HG, NT, KI, and HSCHR chromosomes not supported: #{chr}" if chr
|
608
|
+
raise "LRG, GL, HG, NT, KI, and HSCHR chromosomes not supported: #{chr}" if blacklist_chromosomes.include? chr
|
608
609
|
p = File.expand_path("./chromosome_#{chr}")
|
609
610
|
Organism.root.annotate p
|
610
611
|
p.sub!(%r{.*/organisms/},'share/organisms/')
|
@@ -66,6 +66,10 @@ class TestOrganism < Test::Unit::TestCase
|
|
66
66
|
assert_equal ["ENSG00000133703"], Gene.setup("Kras", "Associated Gene Name", "Mmu/jun2011").ensembl.ortholog(Organism.default_code("Hsa"))
|
67
67
|
end
|
68
68
|
|
69
|
+
def test_chr_sizes
|
70
|
+
assert Organism.chromosome_sizes["2"].to_i > 10_000_000
|
71
|
+
end
|
72
|
+
|
69
73
|
#def test_genes_at_chromosome
|
70
74
|
# pos = [12, 117799500]
|
71
75
|
# assert_equal "ENSG00000089250", Organism::Hsa.genes_at_chromosome_positions(pos.first, pos.last)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.32
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-08-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -89,6 +89,7 @@ files:
|
|
89
89
|
- etc/allowed_biomart_archives
|
90
90
|
- etc/biomart/missing_in_archive
|
91
91
|
- etc/organisms
|
92
|
+
- etc/xena_hubs
|
92
93
|
- lib/rbbt/sources/CASCADE.rb
|
93
94
|
- lib/rbbt/sources/COREAD_phospho_proteome.rb
|
94
95
|
- lib/rbbt/sources/COSTART.rb
|
@@ -101,6 +102,7 @@ files:
|
|
101
102
|
- lib/rbbt/sources/PRO.rb
|
102
103
|
- lib/rbbt/sources/PSI_MI.rb
|
103
104
|
- lib/rbbt/sources/STITCH.rb
|
105
|
+
- lib/rbbt/sources/Xena.rb
|
104
106
|
- lib/rbbt/sources/array_express.rb
|
105
107
|
- lib/rbbt/sources/barcode.rb
|
106
108
|
- lib/rbbt/sources/bibtex.rb
|