rbbt-sources 3.1.46 → 3.1.51

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9cfdfc8f6fd6a11106d90cf39dc8f10165fd36128fe69a8ca4c72fcf878183c3
4
- data.tar.gz: f268c37d7c50b74fb67d2dae77bf25c93dbf1013f23a11e1ffeb0f024fdabdc0
3
+ metadata.gz: 4cb92f1a2c300c6787870f06f5b4ea45967242a33e7c51cf94f002d1901000af
4
+ data.tar.gz: 793c56312f02532861451142988b24463de6a30d460f6f7a7238889b91c9c336
5
5
  SHA512:
6
- metadata.gz: d6e29cb4355ad3307bf3ccbead098406e51216e963cd201fc11c17aa4aad263d184ca1ab1389c4940e32bf1d541471a9aac326b12055585b515f26c63592a025
7
- data.tar.gz: 4663061a1b3f8a80914468c8af3bb8b5da821739fe8943011881ba9b89405e34b40ec9b145e9a7f9fdf7b2cc7cdf8f294fa978b01c41b1f8693a4e73b186d65f
6
+ metadata.gz: 95ab052b23bc28f919e9cf242e0a74e1e433335879d77fc38752925f74e5ec92b8b42b1c27f1fee170bb3125be1a04b83b24c7da524c46efce07241ff169bdff
7
+ data.tar.gz: bbe1970028942398b1b84e314d704acb0fac405045785e45516e8dbd65927852874bcceb0393748fd68a507c6d4292fda579d1df16d7831d47b5b9ea0de6bf7d
@@ -1,3 +1,7 @@
1
+ ">oct2014":
2
+ - rgd~rgd_id
3
+ ">jun2019":
4
+ - entrezgene~entrezgene_id
1
5
  ">dec2017":
2
6
  - unigene
3
7
  ">dec2016":
@@ -134,11 +134,14 @@ module BioMart
134
134
  # cause an error if the BioMart WS does not allow filtering with that
135
135
  # attribute.
136
136
  def self.query(database, main, attrs = nil, filters = nil, data = nil, open_options = {})
137
+ IndiferentHash.setup(open_options)
137
138
  open_options = Misc.add_defaults open_options, :nocache => false, :filename => nil, :field_names => nil, :by_chr => false
138
139
  filename, field_names, by_chr = Misc.process_options open_options, :filename, :field_names, :by_chr
139
140
  attrs ||= []
140
141
  open_options = Misc.add_defaults open_options, :keep_empty => false, :merge => true
141
142
 
143
+ IndiferentHash.setup(open_options)
144
+
142
145
  Log.low "BioMart query: '#{main}' [#{(attrs || []) * ', '}] [#{(filters || []) * ', '}] #{open_options.inspect}"
143
146
 
144
147
  max_items = 2
@@ -167,6 +170,7 @@ module BioMart
167
170
  end
168
171
 
169
172
  open_options[:filename] = "BioMart[#{main}+#{attrs.length}]"
173
+
170
174
  if filename.nil?
171
175
  results = TSV.open data, open_options
172
176
  results.key_field = main
@@ -52,24 +52,35 @@ module Ensembl
52
52
  File.join("ftp://" + SERVER, ftp_directory_for(organism) )
53
53
  end
54
54
 
55
- def self.url_for(organism, table)
56
- "#{base_url(organism)}/#{table}.txt.gz.bz2"
55
+ def self.url_for(organism, table, extension)
56
+ File.join(base_url(organism), table) + ".#{extension}.gz"
57
+ end
58
+
59
+ def self._get_gz(url)
60
+ begin
61
+ CMD.cmd("wget '#{url}' -O - | gunzip").read
62
+ rescue
63
+ CMD.cmd("wget '#{url}.bz2' -O - | bunzip2 | gunzip").read
64
+ end
65
+ end
66
+
67
+ def self._get_file(organism, table, extension)
68
+ url = url_for(organism, table, extension)
69
+ self._get_gz(url)
57
70
  end
58
71
 
59
72
  def self.has_table?(organism, table)
60
- sql_file = CMD.cmd("wget '#{base_url(organism)}/#{File.basename(base_url(organism))}.sql.gz.bz2' -O -| bunzip2| gunzip").read
73
+ sql_file = _get_file(organism, File.basename(base_url(organism)), 'sql')
61
74
  ! sql_file.match(/^CREATE TABLE .#{table}. \((.*?)^\)/sm).nil?
62
75
  end
63
76
 
64
77
  def self.fields_for(organism, table)
65
- sql_file = CMD.cmd("wget '#{base_url(organism)}/#{File.basename(base_url(organism))}.sql.gz.bz2' -O -| bunzip2| gunzip").read
66
-
78
+ sql_file = _get_file(organism, File.basename(base_url(organism)), 'sql')
67
79
  chunk = sql_file.match(/^CREATE TABLE .#{table}. \((.*?)^\)/sm)[1]
68
80
  chunk.scan(/^\s+`(.*?)`/).flatten
69
81
  end
70
82
 
71
83
  def self.ensembl_tsv(organism, table, key_field = nil, fields = nil, options = {})
72
- url = url_for(organism, table)
73
84
  if key_field and fields
74
85
  all_fields = fields_for(organism, table)
75
86
  key_pos = all_fields.index key_field
@@ -78,7 +89,8 @@ module Ensembl
78
89
  options[:key_field] = key_pos
79
90
  options[:fields] = field_pos
80
91
  end
81
- tsv = TSV.open(CMD.cmd("wget '#{url}' -O - |bunzip2|gunzip", :pipe => true), options)
92
+
93
+ tsv = TSV.open(StringIO.new(_get_file(organism, table, "txt")), options)
82
94
  tsv.key_field = key_field
83
95
  tsv.fields = fields
84
96
  tsv
@@ -88,8 +88,10 @@ module Organism
88
88
  end
89
89
  when "Mmu"
90
90
  "mm10"
91
+ when "Rno"
92
+ "rn6"
91
93
  else
92
- raise "Only organism 'Hsa' (Homo sapiens) and Mmu (Mus musculus) supported"
94
+ raise "Only organism 'Hsa' (Homo sapiens), 'Rno' (Rattus norvegicus), and Mmu (Mus musculus) supported"
93
95
  end
94
96
  end
95
97
 
@@ -117,6 +119,8 @@ module Organism
117
119
  end
118
120
  when "Mmu"
119
121
  "GRCm38"
122
+ when "Rno"
123
+ "Rnor_6.0"
120
124
  else
121
125
  raise "Only organism 'Hsa' (Homo sapiens) and Mmu (Mus musculus) supported"
122
126
  end
@@ -145,12 +149,12 @@ module Organism
145
149
  new_positions = {}
146
150
 
147
151
  TmpFile.with_file(positions_bed) do |source_bed|
148
- TmpFile.with_file() do |unmapped_file|
149
- TmpFile.with_file() do |map_file|
152
+ TmpFile.with_file do |unmapped_file|
153
+ TmpFile.with_file do |map_file|
150
154
 
151
155
 
152
156
  Open.write(map_file, Open.read(map_url))
153
- new_mutations = TmpFile.with_file() do |target_bed|
157
+ new_mutations = TmpFile.with_file do |target_bed|
154
158
  FileUtils.chmod(755, Rbbt.software.opt.bin.liftOver.produce.find)
155
159
  CMD.cmd("#{Rbbt.software.opt.bin.liftOver.find} '#{source_bed}' '#{map_file}' '#{target_bed}' '#{unmapped_file}'").read
156
160
  Open.read(target_bed) do |line|
@@ -203,12 +203,19 @@ module UniProt
203
203
  end
204
204
  value = part.gsub("\nFT", '').gsub(/\s+/, ' ')
205
205
  case
206
+ when value.match(/(\d+)\.\.(\d+) (.*)/)
207
+ start, eend, description = $1, $2, $3
208
+ description.gsub(/^FT\s+/m, '')
206
209
  when value.match(/(\d+) (\d+) (.*)/)
207
210
  start, eend, description = $1, $2, $3
208
211
  description.gsub(/^FT\s+/m, '')
209
212
  when value.match(/^\s+(\d+) (\d+)/)
210
213
  start, eend = $1, $2
211
214
  description = nil
215
+ when value.match(/(\d+) (.*)/)
216
+ start, description = $1, $2
217
+ eend = start
218
+ description.gsub(/^FT\s+/m, '')
212
219
  else
213
220
  Log.debug "Value not understood: #{ value }"
214
221
  end
@@ -1,5 +1,10 @@
1
1
  #: :type=:single
2
2
  #Release build
3
+ release-100 apr2020
4
+ release-99 jan2020
5
+ release-98 sep2019
6
+ release-97 jul2019
7
+ release-96 apr2019
3
8
  release-95 jan2019
4
9
  release-94 oct2018
5
10
  release-93 jul2018
@@ -5,7 +5,7 @@ require File.join(File.dirname(__FILE__), '../../lib/helpers')
5
5
 
6
6
  $taxs = [9606]
7
7
  $scientific_name = "Homo sapiens"
8
- $ortholog_key = "human_ensembl_gene"
8
+ $ortholog_key = "hsapiens_homolog_ensembl_gene"
9
9
 
10
10
  $biomart_db = 'hsapiens_gene_ensembl'
11
11
  $biomart_db_germline_variation = 'hsapiens_snp'
@@ -5,7 +5,7 @@ require File.join(File.dirname(__FILE__), '../../lib/helpers')
5
5
 
6
6
  $taxs = [10090]
7
7
  $scientific_name = "Mus musculus"
8
- $ortholog_key = "mouse_ensembl_gene"
8
+ $ortholog_key = "mmusculus_homolog_ensembl_gene"
9
9
 
10
10
  $biomart_db = 'mmusculus_gene_ensembl'
11
11
  $biomart_db_germline_variation = 'mmusculus_snp'
@@ -9,7 +9,7 @@ $scientific_name = "Rattus norvegicus"
9
9
  $biomart_db = 'rnorvegicus_gene_ensembl'
10
10
  $biomart_db_germline_variation = 'rnorvegicus_snp'
11
11
  $biomart_db_somatic_variation = 'rnorvegicus_snp_som'
12
- $ortholog_key = "rat_ensembl_gene"
12
+ $ortholog_key = "rnorvegicus_homolog_ensembl_gene"
13
13
 
14
14
  $biomart_lexicon = [
15
15
  [ 'Associated Gene Name' , "external_gene_id"],
@@ -20,6 +20,7 @@ $biomart_lexicon = [
20
20
 
21
21
  $biomart_identifiers = [
22
22
  ['Entrez Gene ID', "entrezgene"],
23
+ ['Ensembl Protein ID', "ensembl_peptide_id" ],
23
24
  ['Associated Gene Name' , "rgd_symbol"],
24
25
  ['Protein ID' , "protein_id"] ,
25
26
  ['UniProt/SwissProt ID' , "uniprot_swissprot"] ,
@@ -547,13 +547,13 @@ end
547
547
  rule /^possible_ortholog_(.*)/ do |t|
548
548
  other = t.name.match(/ortholog_(.*)/)[1]
549
549
  other_key = Organism.ortholog_key(other).produce.read
550
- BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :flat, :filename => t.name, :namespace => Thread.current['namespace'])
550
+ BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", "inter_paralog_" + other_key]], [], nil, :keep_empty => false, :type => :double, :filename => t.name, :namespace => Thread.current['namespace'])
551
551
  end
552
552
 
553
553
  rule /^ortholog_(.*)/ do |t|
554
554
  other = t.name.match(/ortholog_(.*)/)[1]
555
555
  other_key = Organism.ortholog_key(other).produce.read
556
- BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :flat, :filename => t.name, :namespace => Thread.current['namespace'])
556
+ BioMart.tsv($biomart_db, $biomart_ensembl_gene, [["Ortholog Ensembl Gene ID", other_key]], [], nil, :keep_empty => false, :type => :double, :filename => t.name, :namespace => Thread.current['namespace'])
557
557
  end
558
558
 
559
559
  rule /[a-z]{3}[0-9]{4}\/.*/i do |t|
@@ -728,13 +728,18 @@ file 'protein_sequence' => ["transcripts", "transcript_5utr", "transcript_3utr",
728
728
  transcript_sequence.through do |transcript, sequence|
729
729
  protein = transcript_protein[transcript]
730
730
  next if protein.nil? or protein.empty?
731
+
731
732
  utr5 = transcript_5utr[transcript]
732
733
  utr3 = transcript_3utr[transcript]
733
734
  phase = transcript_phase[transcript] || 0
735
+
734
736
  if phase < 0
735
- utr5 = - phase if utr5 == 0
737
+ if utr5.nil? || utr5 == 0 || utr5 == "0"
738
+ utr5 = 0
739
+ end
736
740
  phase = 0
737
741
  end
742
+
738
743
  psequence = Bio::Sequence::NA.new(("N" * phase) << sequence[utr5..sequence.length-utr3-1]).translate
739
744
  protein_sequence[protein]=psequence
740
745
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.46
4
+ version: 3.1.51
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-05-14 00:00:00.000000000 Z
11
+ date: 2021-01-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util