rbbt-sources 3.1.30 → 3.1.32

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7da6b2938f54788ddcd4ee03b78e8e8b237bf478
4
- data.tar.gz: fc847e1ddf84cb477b4f07ac8b33a393009e72ac
3
+ metadata.gz: 0c4f4b556028ce477969e38d0b4a1d8a05b01305
4
+ data.tar.gz: 208e92cf212d96dc3333000d350e598ec45f6361
5
5
  SHA512:
6
- metadata.gz: d2a2261e40edd858ca483b269d409a95e48f61acfeb07c9b31e2381e5bcffa619c06c6c5afaa4df05ba91405638bbaa94897722c66ef1610344b542746c2aa61
7
- data.tar.gz: 7bd00cb785efd5db78ba14607436e42465055da71119d27741f20b7c7de4b791b90c321aaecc1de1d681c9d5ba75823401d03fd5da247d7a187dec56a27c4fd3
6
+ metadata.gz: 1f3e8855b0a233a6d97b80193a92fd90c45e1bfefb0f5b074a4aad2ec7151d9de1dd1c1eae842a84a9f15aeb436af4b97330845ec4ee8defe03735e65de1037f
7
+ data.tar.gz: a3f529915268806ac18920cfea3786ec2ab906b8492f3f8d6fd7786480a07bf1b17d6b4f5963a6175a1ff231a1697e53099fda66628e5d1fea9f7d525d7cf44e
data/etc/xena_hubs ADDED
@@ -0,0 +1,2 @@
1
+ UCSC public hub (not in my data hubs): https://ucscpublic.xenahubs.net
2
+ TCGA hub: https://tcga.xenahubs.net
@@ -0,0 +1,109 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/resource'
3
+ require 'rest-client'
4
+
5
+ module Xena
6
+ extend Resource
7
+ self.subdir = 'share/databases/Xena'
8
+
9
+ DEFAULT_XENA_HUB = "https://ucscpublic.xenahubs.net"
10
+ TCGA_HUB = "https://tcga.xenahubs.net/"
11
+ #def self.organism(org="Hsa")
12
+ # Organism.default_code(org)
13
+ #end
14
+
15
+ #self.search_paths = {}
16
+ #self.search_paths[:default] = :lib
17
+
18
+ def self.query(query, hub = DEFAULT_XENA_HUB)
19
+ url = File.join(hub, 'data/')
20
+
21
+ uri = URI.parse(url)
22
+ http = Net::HTTP.new(uri.host, uri.port)
23
+ http.use_ssl = true
24
+
25
+ header = {'Content-Type': 'text/plain'}
26
+ request = Net::HTTP::Post.new(uri.request_uri, header)
27
+ request.body = query
28
+ response = http.request(request)
29
+
30
+ JSON.parse(response.body)
31
+ end
32
+
33
+ def self.tsv(file, hub = DEFAULT_XENA_HUB)
34
+ url = "#{hub}/download/#{file}"
35
+ TSV.open(url, :monitor => true, :header_hash => '')
36
+ end
37
+
38
+ def self.cohorts(hub = DEFAULT_XENA_HUB)
39
+ query =<<-EOF
40
+ (;allCohorts
41
+ (fn []
42
+ (map :cohort
43
+ (query
44
+ {:select [[#sql/call [:distinct #sql/call [:ifnull :cohort "(unassigned)"]] :cohort]]
45
+ :from [:dataset]})))
46
+ )
47
+ EOF
48
+
49
+ self.query query, hub
50
+ end
51
+
52
+ def self.cohort_dataset(cohort, hub = DEFAULT_XENA_HUB)
53
+ query =<<-EOF
54
+ (; datasetList
55
+ (fn [cohorts]
56
+ (let [count-table {:select [[:dataset.name :dname] [:%count.value :count]]
57
+ :from [:dataset]
58
+ :join [:field [:= :dataset.id :dataset_id]
59
+ :code [:= :field.id :field_id]]
60
+ :group-by [:dataset.name]
61
+ :where [:= :field.name "sampleID"]}]
62
+ (query {:select [:d.name :d.longtitle :count :d.type :d.datasubtype :d.probemap :d.text :d.status [:pm-dataset.text :pmtext]]
63
+ :from [[:dataset :d]]
64
+ :left-join [[:dataset :pm-dataset] [:= :pm-dataset.name :d.probemap]
65
+ count-table [:= :dname :d.name]]
66
+ :where [:in :d.cohort cohorts]})
67
+ ))
68
+ ["#{ cohort }"])
69
+ EOF
70
+
71
+ self.query(query, hub)
72
+ end
73
+
74
+
75
+ Xena.claim Xena.data, :proc do
76
+ end
77
+ end
78
+
79
+ if __FILE__ == $0
80
+
81
+ Xena.cohorts(Xena::TCGA_HUB).each do |cohort|
82
+ puts Log.color :magenta, cohort
83
+ Xena.cohort_dataset(cohort, Xena::TCGA_HUB).each do |info|
84
+ puts Log.color(:blue, "Name: ") + Log.color(:yellow, info["name"].to_s)
85
+ puts Log.color(:blue, "Title: ") + info["longtitle"].to_s
86
+ puts Log.color(:blue, "Type: ") + info["type"].to_s
87
+ puts
88
+ end
89
+ puts
90
+ end
91
+
92
+ raise "STOP"
93
+
94
+ query_str =<<-EOF
95
+ (;allCohorts
96
+ (fn []
97
+ (map :cohort
98
+ (query
99
+ {:select [[#sql/call [:distinct #sql/call [:ifnull :cohort "(unassigned)"]] :cohort]]
100
+ :from [:dataset]}))))
101
+ EOF
102
+
103
+ Log.severity = 0
104
+ iii Xena.query(query_str, 'https://ucscpublic.xenahubs.net/')
105
+ raise "STOP"
106
+ file = "GTEx_Analysis_v6_RNA-seq_RNA-SeQCv1.1.8_gene_rpkm_log.gz"
107
+ Log.tsv Xena.tsv(file)
108
+ end
109
+
@@ -259,4 +259,20 @@ module Organism
259
259
 
260
260
  Misc.total_length(exon_ranges)
261
261
  end
262
+
263
+ def self.chromosome_sizes(organism = Organism.default_code("Hsa"))
264
+ chromosome_sizes = {}
265
+
266
+ Organism[organism].glob_all("chromosome_*").each do |file|
267
+ chromosome = file.split("_").last.split(".").first
268
+ size = if Open.gzip?(file) || Open.bgzip?(file)
269
+ CMD.cmd("zcat '#{ file }' | wc -c ").read
270
+ else
271
+ File.size(file)
272
+ end
273
+ chromosome_sizes[chromosome] = size.to_i
274
+ end
275
+
276
+ chromosome_sizes
277
+ end
262
278
  end
@@ -573,12 +573,13 @@ end
573
573
  #{{{ Special files
574
574
  require 'bio'
575
575
 
576
- file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
576
+ file 'transcript_sequence' => ["exons", "transcript_exons", "blacklist_chromosomes"] do |t|
577
577
  exon_info = TSV.open('exons', :type => :list, :fields => ["Exon Strand", "Exon Chr Start", "Exon Chr End", "Chromosome Name"], :unnamed => true)
578
578
 
579
579
  chr_transcript_ranges ||= {}
580
580
  transcript_strand = {}
581
581
 
582
+ blacklist_chromosomes = Path.setup(File.expand_path('blacklist_chromosomes')).list
582
583
  transcript_exons = Path.setup(File.expand_path('transcript_exons'))
583
584
  TSV.traverse transcript_exons do |transcript,values|
584
585
  transcript = transcript.first if Array === transcript
@@ -604,7 +605,7 @@ file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
604
605
  transcript_sequence = {}
605
606
  chr_transcript_ranges.each do |chr, transcript_ranges|
606
607
  begin
607
- raise "LRG, GL, HG, NT, KI, and HSCHR chromosomes not supported: #{chr}" if chr =~ /(?:LRG_|GL0|HG|HSCHR|NT|KI)/
608
+ raise "LRG, GL, HG, NT, KI, and HSCHR chromosomes not supported: #{chr}" if blacklist_chromosomes.include? chr
608
609
  p = File.expand_path("./chromosome_#{chr}")
609
610
  Organism.root.annotate p
610
611
  p.sub!(%r{.*/organisms/},'share/organisms/')
@@ -66,6 +66,10 @@ class TestOrganism < Test::Unit::TestCase
66
66
  assert_equal ["ENSG00000133703"], Gene.setup("Kras", "Associated Gene Name", "Mmu/jun2011").ensembl.ortholog(Organism.default_code("Hsa"))
67
67
  end
68
68
 
69
+ def test_chr_sizes
70
+ assert Organism.chromosome_sizes["2"].to_i > 10_000_000
71
+ end
72
+
69
73
  #def test_genes_at_chromosome
70
74
  # pos = [12, 117799500]
71
75
  # assert_equal "ENSG00000089250", Organism::Hsa.genes_at_chromosome_positions(pos.first, pos.last)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.30
4
+ version: 3.1.32
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-05-14 00:00:00.000000000 Z
11
+ date: 2018-08-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -89,6 +89,7 @@ files:
89
89
  - etc/allowed_biomart_archives
90
90
  - etc/biomart/missing_in_archive
91
91
  - etc/organisms
92
+ - etc/xena_hubs
92
93
  - lib/rbbt/sources/CASCADE.rb
93
94
  - lib/rbbt/sources/COREAD_phospho_proteome.rb
94
95
  - lib/rbbt/sources/COSTART.rb
@@ -101,6 +102,7 @@ files:
101
102
  - lib/rbbt/sources/PRO.rb
102
103
  - lib/rbbt/sources/PSI_MI.rb
103
104
  - lib/rbbt/sources/STITCH.rb
105
+ - lib/rbbt/sources/Xena.rb
104
106
  - lib/rbbt/sources/array_express.rb
105
107
  - lib/rbbt/sources/barcode.rb
106
108
  - lib/rbbt/sources/bibtex.rb