rbbt-sources 3.1.45 → 3.1.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ecb6a979911fd4e4a4988ab9abee186cb181eab335b41505d71e3405f1cac454
4
- data.tar.gz: 5b6ba6e080cff6d6bd864dc468f418b1d74d9ee6fe916df0b9edc2a8ba2e5471
3
+ metadata.gz: 76650eb77129f5a072416944c24e3155e507ffe75f79372c505d62b919899e48
4
+ data.tar.gz: 70bf96eb1fa599f121cf65f40388aae545c4a6bb03ca006bc24f4ec83c78175d
5
5
  SHA512:
6
- metadata.gz: 72cc59fa3439ffbb4b850723c4b6c5403f65301a70ddf61f6672e52646ee1e90644ac125234cff1a51702a100d7ea2731940cfb33a49d0c3531b5fd8877d1d24
7
- data.tar.gz: fdf336019c660cf2fb0eea5af696edae0dbb3d31adf7c063060626c44d2ecf6864f1c0dd614479a1166583ad575cb288ce50e4291f90781f26d06568d296c3e7
6
+ metadata.gz: dc1fc3dd34681ee50feaff3a447b98abf484eb5ed2775804e6aa2268e14dede03e16970ed7c5bfe5c5015b081514cb3309f178cbca973434848f83368ba27778
7
+ data.tar.gz: 1ce0587059951ee335cf08f3cf22ceb24c0b0deaaf9798689be141a53871934e1e679efbf2faf578fb10bf3088386539204cecc121a68becddedcdb0aad988c5
@@ -1,3 +1,7 @@
1
+ ">oct2014":
2
+ - rgd~rgd_id
3
+ ">jun2019":
4
+ - entrezgene~entrezgene_id
1
5
  ">dec2017":
2
6
  - unigene
3
7
  ">dec2016":
@@ -134,11 +134,14 @@ module BioMart
134
134
  # cause an error if the BioMart WS does not allow filtering with that
135
135
  # attribute.
136
136
  def self.query(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
137
+ IndiferentHash.setup(open_options)
137
138
  open_options = Misc.add_defaults open_options, :nocache => false, :filename => nil, :field_names => nil, :by_chr => false
138
139
  filename, field_names, by_chr = Misc.process_options open_options, :filename, :field_names, :by_chr
139
140
  attrs ||= []
140
141
  open_options = Misc.add_defaults open_options, :keep_empty => false, :merge => true
141
142
 
143
+ IndiferentHash.setup(open_options)
144
+
142
145
  Log.low "BioMart query: '#{main}' [#{(attrs || []) * ', '}] [#{(filters || []) * ', '}] #{open_options.inspect}"
143
146
 
144
147
  max_items = 2
@@ -167,6 +170,7 @@ module BioMart
167
170
  end
168
171
 
169
172
  open_options[:filename] = "BioMart[#{main}+#{attrs.length}]"
173
+
170
174
  if filename.nil?
171
175
  results = TSV.open data, open_options
172
176
  results.key_field = main
@@ -52,24 +52,35 @@ module Ensembl
52
52
  File.join("ftp://" + SERVER, ftp_directory_for(organism) )
53
53
  end
54
54
 
55
- def self.url_for(organism, table)
56
- "#{base_url(organism)}/#{table}.txt.gz.bz2"
55
+ def self.url_for(organism, table, extension)
56
+ File.join(base_url(organism), table) + ".#{extension}.gz"
57
+ end
58
+
59
+ def self._get_gz(url)
60
+ begin
61
+ CMD.cmd("wget '#{url}' -O - | gunzip").read
62
+ rescue
63
+ CMD.cmd("wget '#{url}.bz2' -O - | bunzip2 | gunzip").read
64
+ end
65
+ end
66
+
67
+ def self._get_file(organism, table, extension)
68
+ url = url_for(organism, table, extension)
69
+ self._get_gz(url)
57
70
  end
58
71
 
59
72
  def self.has_table?(organism, table)
60
- sql_file = CMD.cmd("wget '#{base_url(organism)}/#{File.basename(base_url(organism))}.sql.gz.bz2' -O -| bunzip2| gunzip").read
73
+ sql_file = _get_file(organism, File.basename(base_url(organism)), 'sql')
61
74
  ! sql_file.match(/^CREATE TABLE .#{table}. \((.*?)^\)/sm).nil?
62
75
  end
63
76
 
64
77
  def self.fields_for(organism, table)
65
- sql_file = CMD.cmd("wget '#{base_url(organism)}/#{File.basename(base_url(organism))}.sql.gz.bz2' -O -| bunzip2| gunzip").read
66
-
78
+ sql_file = _get_file(organism, File.basename(base_url(organism)), 'sql')
67
79
  chunk = sql_file.match(/^CREATE TABLE .#{table}. \((.*?)^\)/sm)[1]
68
80
  chunk.scan(/^\s+`(.*?)`/).flatten
69
81
  end
70
82
 
71
83
  def self.ensembl_tsv(organism, table, key_field = nil, fields = nil, options = {})
72
- url = url_for(organism, table)
73
84
  if key_field and fields
74
85
  all_fields = fields_for(organism, table)
75
86
  key_pos = all_fields.index key_field
@@ -78,7 +89,8 @@ module Ensembl
78
89
  options[:key_field] = key_pos
79
90
  options[:fields] = field_pos
80
91
  end
81
- tsv = TSV.open(CMD.cmd("wget '#{url}' -O - |bunzip2|gunzip", :pipe => true), options)
92
+
93
+ tsv = TSV.open(StringIO.new(_get_file(organism, table, "txt")), options)
82
94
  tsv.key_field = key_field
83
95
  tsv.fields = fields
84
96
  tsv
@@ -88,8 +88,10 @@ module Organism
88
88
  end
89
89
  when "Mmu"
90
90
  "mm10"
91
+ when "Rno"
92
+ "rn6"
91
93
  else
92
- raise "Only organism 'Hsa' (Homo sapiens) and Mmu (Mus musculus) supported"
94
+ raise "Only organism 'Hsa' (Homo sapiens), 'Rno' (Rattus norvegicus), and Mmu (Mus musculus) supported"
93
95
  end
94
96
  end
95
97
 
@@ -117,6 +119,8 @@ module Organism
117
119
  end
118
120
  when "Mmu"
119
121
  "GRCm38"
122
+ when "Rno"
123
+ "Rnor_6.0"
120
124
  else
121
125
  raise "Only organism 'Hsa' (Homo sapiens) and Mmu (Mus musculus) supported"
122
126
  end
@@ -145,12 +149,12 @@ module Organism
145
149
  new_positions = {}
146
150
 
147
151
  TmpFile.with_file(positions_bed) do |source_bed|
148
- TmpFile.with_file() do |unmapped_file|
149
- TmpFile.with_file() do |map_file|
152
+ TmpFile.with_file do |unmapped_file|
153
+ TmpFile.with_file do |map_file|
150
154
 
151
155
 
152
156
  Open.write(map_file, Open.read(map_url))
153
- new_mutations = TmpFile.with_file() do |target_bed|
157
+ new_mutations = TmpFile.with_file do |target_bed|
154
158
  FileUtils.chmod(755, Rbbt.software.opt.bin.liftOver.produce.find)
155
159
  CMD.cmd("#{Rbbt.software.opt.bin.liftOver.find} '#{source_bed}' '#{map_file}' '#{target_bed}' '#{unmapped_file}'").read
156
160
  Open.read(target_bed) do |line|
@@ -2,15 +2,15 @@ require 'rbbt'
2
2
  require 'rbbt/tsv'
3
3
  require 'rbbt/resource'
4
4
 
5
- module TFacts
5
+ module TFactS
6
6
  extend Resource
7
- self.subdir = "share/databases/TFacts"
7
+ self.subdir = "share/databases/TFactS"
8
8
 
9
- TFacts.claim TFacts[".source"]["Catalogues.xls"], :url, "http://www.tfacts.org/TFactS-new/TFactS-v2/tfacts/data/Catalogues.xls"
9
+ TFactS.claim TFactS[".source"]["Catalogues.xls"], :url, "http://www.tfacts.org/TFactS-new/TFactS-v2/tfacts/data/Catalogues.xls"
10
10
 
11
- TFacts.claim TFacts.targets, :proc do
11
+ TFactS.claim TFactS.targets, :proc do
12
12
  require 'spreadsheet'
13
- book = Spreadsheet.open TFacts[".source"]["Catalogues.xls"].produce.find
13
+ book = Spreadsheet.open TFactS[".source"]["Catalogues.xls"].produce.find
14
14
  sheet = book.worksheet 0
15
15
 
16
16
  tsv = TSV.setup({}, :key_field => "Target Gene (Associated Gene Name)", :fields => ["Transcription Factor (Associated Gene Name)"], :namespace => "Hsa", :type => :flat)
@@ -24,9 +24,9 @@ module TFacts
24
24
  tsv.to_s
25
25
  end
26
26
 
27
- TFacts.claim TFacts.targets_signed, :proc do
27
+ TFactS.claim TFactS.targets_signed, :proc do
28
28
  require 'spreadsheet'
29
- book = Spreadsheet.open TFacts[".source"]["Catalogues.xls"].produce.find
29
+ book = Spreadsheet.open TFactS[".source"]["Catalogues.xls"].produce.find
30
30
  sheet = book.worksheet 1
31
31
 
32
32
  tsv = TSV.setup({}, :key_field => "Target Gene (Associated Gene Name)", :fields => ["Transcription Factor (Associated Gene Name)", "Sign", "PMID"], :namespace => "Hsa", :type => :double)
@@ -43,13 +43,13 @@ module TFacts
43
43
  tsv.to_s
44
44
  end
45
45
 
46
- TFacts.claim TFacts.regulators, :proc do
47
- TFacts.targets.tsv.reorder("Transcription Factor (Associated Gene Name)").to_s
46
+ TFactS.claim TFactS.regulators, :proc do
47
+ TFactS.targets.tsv.reorder("Transcription Factor (Associated Gene Name)").to_s
48
48
  end
49
49
 
50
- TFacts.claim TFacts.tf_tg, :proc do
50
+ TFactS.claim TFactS.tf_tg, :proc do
51
51
  require 'spreadsheet'
52
- book = Spreadsheet.open TFacts[".source"]["Catalogues.xls"].produce.find
52
+ book = Spreadsheet.open TFactS[".source"]["Catalogues.xls"].produce.find
53
53
 
54
54
  tsv = TSV.setup({}, :key_field => "Transcription Factor (Associated Gene Name)", :fields => ["Target Gene (Associated Gene Name)", "Sign", "Species", "Source", "PMID"], :namespace => "Hsa", :type => :double)
55
55
 
@@ -128,16 +128,16 @@ if defined? Entity and defined? Gene and Entity === Gene
128
128
 
129
129
  module Gene
130
130
  property :is_transcription_factor? => :array2single do
131
- tfs = TFacts.targets.keys
131
+ tfs = TFactS.targets.keys
132
132
  self.name.collect{|gene| tfs.include? gene}
133
133
  end
134
134
 
135
135
  property :transcription_regulators => :array2single do
136
- Gene.setup(TFacts.regulators.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
136
+ Gene.setup(TFactS.regulators.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
137
137
  end
138
138
 
139
139
  property :transcription_targets => :array2single do
140
- Gene.setup(TFacts.targets.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
140
+ Gene.setup(TFactS.targets.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
141
141
  end
142
142
  end
143
143
  end
@@ -1,5 +1,10 @@
1
1
  #: :type=:single
2
2
  #Release build
3
+ release-100 apr2020
4
+ release-99 jan2020
5
+ release-98 sep2019
6
+ release-97 jul2019
7
+ release-96 apr2019
3
8
  release-95 jan2019
4
9
  release-94 oct2018
5
10
  release-93 jul2018
@@ -5,7 +5,7 @@ require File.join(File.dirname(__FILE__), '../../lib/helpers')
5
5
 
6
6
  $taxs = [9606]
7
7
  $scientific_name = "Homo sapiens"
8
- $ortholog_key = "human_ensembl_gene"
8
+ $ortholog_key = "hsapiens_homolog_ensembl_gene"
9
9
 
10
10
  $biomart_db = 'hsapiens_gene_ensembl'
11
11
  $biomart_db_germline_variation = 'hsapiens_snp'
@@ -5,7 +5,7 @@ require File.join(File.dirname(__FILE__), '../../lib/helpers')
5
5
 
6
6
  $taxs = [10090]
7
7
  $scientific_name = "Mus musculus"
8
- $ortholog_key = "mouse_ensembl_gene"
8
+ $ortholog_key = "mmusculus_homolog_ensembl_gene"
9
9
 
10
10
  $biomart_db = 'mmusculus_gene_ensembl'
11
11
  $biomart_db_germline_variation = 'mmusculus_snp'
@@ -9,7 +9,7 @@ $scientific_name = "Rattus norvegicus"
9
9
  $biomart_db = 'rnorvegicus_gene_ensembl'
10
10
  $biomart_db_germline_variation = 'rnorvegicus_snp'
11
11
  $biomart_db_somatic_variation = 'rnorvegicus_snp_som'
12
- $ortholog_key = "rat_ensembl_gene"
12
+ $ortholog_key = "rnorvegicus_homolog_ensembl_gene"
13
13
 
14
14
  $biomart_lexicon = [
15
15
  [ 'Associated Gene Name' , "external_gene_id"],
@@ -20,6 +20,7 @@ $biomart_lexicon = [
20
20
 
21
21
  $biomart_identifiers = [
22
22
  ['Entrez Gene ID', "entrezgene"],
23
+ ['Ensembl Protein ID', "ensembl_peptide_id" ],
23
24
  ['Associated Gene Name' , "rgd_symbol"],
24
25
  ['Protein ID' , "protein_id"] ,
25
26
  ['UniProt/SwissProt ID' , "uniprot_swissprot"] ,
@@ -547,13 +547,13 @@ end
547
547
  rule /^possible_ortholog_(.*)/ do |t|
548
548
  other = t.name.match(/ortholog_(.*)/)[1]
549
549
  other_key = Organism.ortholog_key(other).produce.read
550
- BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :flat, :filename => t.name, :namespace => Thread.current['namespace'])
550
+ BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :double, :filename => t.name, :namespace => Thread.current['namespace'])
551
551
  end
552
552
 
553
553
  rule /^ortholog_(.*)/ do |t|
554
554
  other = t.name.match(/ortholog_(.*)/)[1]
555
555
  other_key = Organism.ortholog_key(other).produce.read
556
- BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :flat, :filename => t.name, :namespace => Thread.current['namespace'])
556
+ BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :double, :filename => t.name, :namespace => Thread.current['namespace'])
557
557
  end
558
558
 
559
559
  rule /[a-z]{3}[0-9]{4}\/.*/i do |t|
@@ -728,13 +728,18 @@ file 'protein_sequence' => ["transcripts", "transcript_5utr", "transcript_3utr",
728
728
  transcript_sequence.through do |transcript, sequence|
729
729
  protein = transcript_protein[transcript]
730
730
  next if protein.nil? or protein.empty?
731
+
731
732
  utr5 = transcript_5utr[transcript]
732
733
  utr3 = transcript_3utr[transcript]
733
734
  phase = transcript_phase[transcript] || 0
735
+
734
736
  if phase < 0
735
- utr5 = - phase if utr5 == 0
737
+ if utr5.nil? || utr5 == 0 || utr5 == "0"
738
+ utr5 = 0
739
+ end
736
740
  phase = 0
737
741
  end
742
+
738
743
  psequence = Bio::Sequence::NA.new(("N" * phase) << sequence[utr5..sequence.length-utr3-1]).translate
739
744
  protein_sequence[protein]=psequence
740
745
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.45
4
+ version: 3.1.50
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-13 00:00:00.000000000 Z
11
+ date: 2020-11-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util