rbbt-sources 3.1.30 → 3.1.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7da6b2938f54788ddcd4ee03b78e8e8b237bf478
4
- data.tar.gz: fc847e1ddf84cb477b4f07ac8b33a393009e72ac
3
+ metadata.gz: 0c4f4b556028ce477969e38d0b4a1d8a05b01305
4
+ data.tar.gz: 208e92cf212d96dc3333000d350e598ec45f6361
5
5
  SHA512:
6
- metadata.gz: d2a2261e40edd858ca483b269d409a95e48f61acfeb07c9b31e2381e5bcffa619c06c6c5afaa4df05ba91405638bbaa94897722c66ef1610344b542746c2aa61
7
- data.tar.gz: 7bd00cb785efd5db78ba14607436e42465055da71119d27741f20b7c7de4b791b90c321aaecc1de1d681c9d5ba75823401d03fd5da247d7a187dec56a27c4fd3
6
+ metadata.gz: 1f3e8855b0a233a6d97b80193a92fd90c45e1bfefb0f5b074a4aad2ec7151d9de1dd1c1eae842a84a9f15aeb436af4b97330845ec4ee8defe03735e65de1037f
7
+ data.tar.gz: a3f529915268806ac18920cfea3786ec2ab906b8492f3f8d6fd7786480a07bf1b17d6b4f5963a6175a1ff231a1697e53099fda66628e5d1fea9f7d525d7cf44e
data/etc/xena_hubs ADDED
@@ -0,0 +1,2 @@
1
+ UCSC public hub (not in my data hubs): https://ucscpublic.xenahubs.net
2
+ TCGA hub: https://tcga.xenahubs.net
@@ -0,0 +1,109 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/resource'
3
+ require 'rest-client'
4
+
5
+ module Xena
6
+ extend Resource
7
+ self.subdir = 'share/databases/Xena'
8
+
9
+ DEFAULT_XENA_HUB = "https://ucscpublic.xenahubs.net"
10
+ TCGA_HUB = "https://tcga.xenahubs.net/"
11
+ #def self.organism(org="Hsa")
12
+ # Organism.default_code(org)
13
+ #end
14
+
15
+ #self.search_paths = {}
16
+ #self.search_paths[:default] = :lib
17
+
18
+ def self.query(query, hub = DEFAULT_XENA_HUB)
19
+ url = File.join(hub, 'data/')
20
+
21
+ uri = URI.parse(url)
22
+ http = Net::HTTP.new(uri.host, uri.port)
23
+ http.use_ssl = true
24
+
25
+ header = {'Content-Type': 'text/plain'}
26
+ request = Net::HTTP::Post.new(uri.request_uri, header)
27
+ request.body = query
28
+ response = http.request(request)
29
+
30
+ JSON.parse(response.body)
31
+ end
32
+
33
+ def self.tsv(file, hub = DEFAULT_XENA_HUB)
34
+ url = "#{hub}/download/#{file}"
35
+ TSV.open(url, :monitor => true, :header_hash => '')
36
+ end
37
+
38
+ def self.cohorts(hub = DEFAULT_XENA_HUB)
39
+ query =<<-EOF
40
+ (;allCohorts
41
+ (fn []
42
+ (map :cohort
43
+ (query
44
+ {:select [[#sql/call [:distinct #sql/call [:ifnull :cohort "(unassigned)"]] :cohort]]
45
+ :from [:dataset]})))
46
+ )
47
+ EOF
48
+
49
+ self.query query, hub
50
+ end
51
+
52
+ def self.cohort_dataset(cohort, hub = DEFAULT_XENA_HUB)
53
+ query =<<-EOF
54
+ (; datasetList
55
+ (fn [cohorts]
56
+ (let [count-table {:select [[:dataset.name :dname] [:%count.value :count]]
57
+ :from [:dataset]
58
+ :join [:field [:= :dataset.id :dataset_id]
59
+ :code [:= :field.id :field_id]]
60
+ :group-by [:dataset.name]
61
+ :where [:= :field.name "sampleID"]}]
62
+ (query {:select [:d.name :d.longtitle :count :d.type :d.datasubtype :d.probemap :d.text :d.status [:pm-dataset.text :pmtext]]
63
+ :from [[:dataset :d]]
64
+ :left-join [[:dataset :pm-dataset] [:= :pm-dataset.name :d.probemap]
65
+ count-table [:= :dname :d.name]]
66
+ :where [:in :d.cohort cohorts]})
67
+ ))
68
+ ["#{ cohort }"])
69
+ EOF
70
+
71
+ self.query(query, hub)
72
+ end
73
+
74
+
75
+ Xena.claim Xena.data, :proc do
76
+ end
77
+ end
78
+
79
+ if __FILE__ == $0
80
+
81
+ Xena.cohorts(Xena::TCGA_HUB).each do |cohort|
82
+ puts Log.color :magenta, cohort
83
+ Xena.cohort_dataset(cohort, Xena::TCGA_HUB).each do |info|
84
+ puts Log.color(:blue, "Name: ") + Log.color(:yellow, info["name"].to_s)
85
+ puts Log.color(:blue, "Title: ") + info["longtitle"].to_s
86
+ puts Log.color(:blue, "Type: ") + info["type"].to_s
87
+ puts
88
+ end
89
+ puts
90
+ end
91
+
92
+ raise "STOP"
93
+
94
+ query_str =<<-EOF
95
+ (;allCohorts
96
+ (fn []
97
+ (map :cohort
98
+ (query
99
+ {:select [[#sql/call [:distinct #sql/call [:ifnull :cohort "(unassigned)"]] :cohort]]
100
+ :from [:dataset]}))))
101
+ EOF
102
+
103
+ Log.severity = 0
104
+ iii Xena.query(query_str, 'https://ucscpublic.xenahubs.net/')
105
+ raise "STOP"
106
+ file = "GTEx_Analysis_v6_RNA-seq_RNA-SeQCv1.1.8_gene_rpkm_log.gz"
107
+ Log.tsv Xena.tsv(file)
108
+ end
109
+
@@ -259,4 +259,20 @@ module Organism
259
259
 
260
260
  Misc.total_length(exon_ranges)
261
261
  end
262
+
263
+ def self.chromosome_sizes(organism = Organism.default_code("Hsa"))
264
+ chromosome_sizes = {}
265
+
266
+ Organism[organism].glob_all("chromosome_*").each do |file|
267
+ chromosome = file.split("_").last.split(".").first
268
+ size = if Open.gzip?(file) || Open.bgzip?(file)
269
+ CMD.cmd("zcat '#{ file }' | wc -c ").read
270
+ else
271
+ File.size(file)
272
+ end
273
+ chromosome_sizes[chromosome] = size.to_i
274
+ end
275
+
276
+ chromosome_sizes
277
+ end
262
278
  end
@@ -573,12 +573,13 @@ end
573
573
  #{{{ Special files
574
574
  require 'bio'
575
575
 
576
- file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
576
+ file 'transcript_sequence' => ["exons", "transcript_exons", "blacklist_chromosomes"] do |t|
577
577
  exon_info = TSV.open('exons', :type => :list, :fields => ["Exon Strand", "Exon Chr Start", "Exon Chr End", "Chromosome Name"], :unnamed => true)
578
578
 
579
579
  chr_transcript_ranges ||= {}
580
580
  transcript_strand = {}
581
581
 
582
+ blacklist_chromosomes = Path.setup(File.expand_path('blacklist_chromosomes')).list
582
583
  transcript_exons = Path.setup(File.expand_path('transcript_exons'))
583
584
  TSV.traverse transcript_exons do |transcript,values|
584
585
  transcript = transcript.first if Array === transcript
@@ -604,7 +605,7 @@ file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
604
605
  transcript_sequence = {}
605
606
  chr_transcript_ranges.each do |chr, transcript_ranges|
606
607
  begin
607
- raise "LRG, GL, HG, NT, KI, and HSCHR chromosomes not supported: #{chr}" if chr =~ /(?:LRG_|GL0|HG|HSCHR|NT|KI)/
608
+ raise "LRG, GL, HG, NT, KI, and HSCHR chromosomes not supported: #{chr}" if blacklist_chromosomes.include? chr
608
609
  p = File.expand_path("./chromosome_#{chr}")
609
610
  Organism.root.annotate p
610
611
  p.sub!(%r{.*/organisms/},'share/organisms/')
@@ -66,6 +66,10 @@ class TestOrganism < Test::Unit::TestCase
66
66
  assert_equal ["ENSG00000133703"], Gene.setup("Kras", "Associated Gene Name", "Mmu/jun2011").ensembl.ortholog(Organism.default_code("Hsa"))
67
67
  end
68
68
 
69
+ def test_chr_sizes
70
+ assert Organism.chromosome_sizes["2"].to_i > 10_000_000
71
+ end
72
+
69
73
  #def test_genes_at_chromosome
70
74
  # pos = [12, 117799500]
71
75
  # assert_equal "ENSG00000089250", Organism::Hsa.genes_at_chromosome_positions(pos.first, pos.last)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.30
4
+ version: 3.1.32
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-05-14 00:00:00.000000000 Z
11
+ date: 2018-08-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -89,6 +89,7 @@ files:
89
89
  - etc/allowed_biomart_archives
90
90
  - etc/biomart/missing_in_archive
91
91
  - etc/organisms
92
+ - etc/xena_hubs
92
93
  - lib/rbbt/sources/CASCADE.rb
93
94
  - lib/rbbt/sources/COREAD_phospho_proteome.rb
94
95
  - lib/rbbt/sources/COSTART.rb
@@ -101,6 +102,7 @@ files:
101
102
  - lib/rbbt/sources/PRO.rb
102
103
  - lib/rbbt/sources/PSI_MI.rb
103
104
  - lib/rbbt/sources/STITCH.rb
105
+ - lib/rbbt/sources/Xena.rb
104
106
  - lib/rbbt/sources/array_express.rb
105
107
  - lib/rbbt/sources/barcode.rb
106
108
  - lib/rbbt/sources/bibtex.rb