rbbt-sources 3.1.51 → 3.1.52

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4cb92f1a2c300c6787870f06f5b4ea45967242a33e7c51cf94f002d1901000af
4
- data.tar.gz: 793c56312f02532861451142988b24463de6a30d460f6f7a7238889b91c9c336
3
+ metadata.gz: 19722279e8b56d1b0cb9e72001e11333e8e436d3a59bf746be3ed744afd047e1
4
+ data.tar.gz: 7eca716d5a2ee5ab6ba35547230d01466417f373df775fda110cae79366b56d5
5
5
  SHA512:
6
- metadata.gz: 95ab052b23bc28f919e9cf242e0a74e1e433335879d77fc38752925f74e5ec92b8b42b1c27f1fee170bb3125be1a04b83b24c7da524c46efce07241ff169bdff
7
- data.tar.gz: bbe1970028942398b1b84e314d704acb0fac405045785e45516e8dbd65927852874bcceb0393748fd68a507c6d4292fda579d1df16d7831d47b5b9ea0de6bf7d
6
+ metadata.gz: 9acfd6f4718444fed3891d431e49396d42293105767915375bf86d485e84ed58c5dbb81eff6509ced77e6ae15c38bbbcf91d52590aca6d53e0b3feb4db8c90b6
7
+ data.tar.gz: 572c173002d2bad704df755542448fc294071c0849bfbac997ce254001591fb463ea8244c167066953224b1e758108ef91417a10b22c02bd0991d819efc68453
@@ -1,9 +1,4 @@
1
1
  may2009
2
- may2012
3
- dec2013
4
2
  feb2014
5
- dec2014
6
- dec2015
7
- oct2016
8
3
  may2017
9
4
  apr2019
@@ -1,54 +1,95 @@
1
1
  require 'rbbt-util'
2
2
  require 'rbbt/resource'
3
+ require 'rbbt/sources/organism'
3
4
 
4
5
  module ClinVar
5
6
  extend Resource
6
7
  self.subdir = 'share/databases/ClinVar'
7
8
 
8
- def self.organism(org="Hsa")
9
- Organism.default_code(org)
9
+ def self.organism_hg19(org="Hsa")
10
+ Organism.organism_for_build("hg19")
10
11
  end
11
12
 
13
+ def self.organism_hg38(org="Hsa")
14
+ Organism.organism_for_build("hg38")
15
+ end
16
+
17
+
12
18
  ClinVar.claim ClinVar.variant_summary, :url, "ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz"
13
19
 
14
- ClinVar.claim ClinVar.snv_summary, :proc do
15
- url = "ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz"
16
- io = TSV.traverse ClinVar.variant_summary, :type => :array, :into => :stream do |line|
17
- line = Misc.fixutf8 line
20
+ ClinVar.claim ClinVar.hg19.snv_summary, :proc do
21
+ parser = TSV::Parser.new ClinVar.variant_summary, :type => :list
22
+ dumper = TSV::Dumper.new :fields => parser.fields, :key_field => "Genomic Mutation", :organism => ClinVar.organism_hg19
23
+ dumper.init
24
+ chr_pos, start_pos, ref_pos, alt_pos, assembly_pos = %w(Chromosome PositionVCF ReferenceAlleleVCF AlternateAlleleVCF Assembly).collect{|f| parser.fields.index f}
25
+ TSV.traverse parser, :into => dumper, :bar => true do |allele,values,fields|
26
+ chr, start, ref, alt, assembly = values.values_at chr_pos, start_pos, ref_pos, alt_pos, assembly_pos
27
+ next if assembly != "GRCh37"
28
+ pos, muts = Misc.correct_vcf_mutation(start.to_i, ref, alt)
29
+ res = muts.collect{|m| [[chr, pos, m] * ":", values] }
30
+
31
+ res.extend MultipleResult
32
+
33
+ res
34
+ end
35
+ dumper.stream
36
+ end
37
+
38
+ ClinVar.claim ClinVar.hg38.snv_summary, :proc do
39
+ parser = TSV::Parser.new ClinVar.variant_summary, :type => :list
40
+ dumper = TSV::Dumper.new :fields => parser.fields, :key_field => "Genomic Mutation", :organism => ClinVar.organism_hg38
41
+ dumper.init
42
+ chr_pos, start_pos, ref_pos, alt_pos, assembly_pos = %w(Chromosome PositionVCF ReferenceAlleleVCF AlternateAlleleVCF Assembly).collect{|f| parser.fields.index f}
43
+ TSV.traverse parser, :into => dumper, :bar => true do |allele,values,fields|
44
+ chr, start, ref, alt, assembly = values.values_at chr_pos, start_pos, ref_pos, alt_pos, assembly_pos
45
+ next if assembly != "GRCh38"
46
+ pos, muts = Misc.correct_vcf_mutation(start.to_i, ref, alt)
47
+ res = muts.collect{|m| [[chr, pos, m] * ":", values] }
48
+
49
+ res.extend MultipleResult
50
+
51
+ res
52
+ end
53
+ dumper.stream
54
+ end
55
+
56
+
57
+ ClinVar.claim ClinVar.hg19.mi_summary, :proc do
58
+ require 'rbbt/workflow'
59
+ Workflow.require_workflow "Sequence"
60
+ variants = ClinVar.hg19.snv_summary.produce
61
+ muts = CMD.cmd('cut -f 1', :in => variants.open, :pipe => true)
62
+ consequence = Sequence.job(:mutated_isoforms_fast, "Clinvar", :mutations => muts, :non_synonymous => true, :organism => ClinVar.organism_hg19).clean.run(true)
63
+
64
+ options = TSV.parse_header(variants).options.merge({:key_field => "Mutated Isoform"})
65
+ fields = options[:fields].length
66
+ dumper = TSV::Dumper.new options
67
+ dumper.init
68
+ pasted = TSV.paste_streams([variants, TSV.get_stream(consequence)])
69
+ TSV.traverse pasted, :into => dumper, :bar => true do |mutation,values|
18
70
  begin
71
+ mis = values[fields..-1].flatten
72
+ next if mis.empty?
19
73
  res = []
20
- if line =~ /^#/
21
- parts = line.split("\t")
22
- res << (["#Genomic Mutation"] + parts[1..12] + parts[15..23]) * "\t"
23
- else
24
- next unless line =~ /GRCh37/
25
- next if line =~ /(copy number|NT expansion|duplication|indel)/
26
- parts = line.split("\t")
27
- chr,pos,ref,mut = parts.values_at 13, 14, 25, 26
28
- next if ref == 'na' or mut == 'na'
29
-
30
- pos, muts = Misc.correct_mutation(pos.to_i,ref,mut)
31
- muts.each do |mut|
32
- mutation = [chr,pos,mut] * ":"
33
- res << ([mutation] + parts[1..12] + parts[15..23]) * "\t"
34
- end
35
- end
36
74
  res.extend MultipleResult
75
+ mis.each do |mi|
76
+ res << [mi, values[0..fields-1]]
77
+ end
37
78
  res
38
79
  rescue
39
80
  Log.exception $!
40
81
  raise $!
41
82
  end
42
83
  end
43
- Misc.sort_stream(io)
84
+ dumper.stream
44
85
  end
45
86
 
46
- ClinVar.claim ClinVar.mi_summary, :proc do
87
+ ClinVar.claim ClinVar.hg38.mi_summary, :proc do
47
88
  require 'rbbt/workflow'
48
89
  Workflow.require_workflow "Sequence"
49
- variants = ClinVar.snv_summary.produce
90
+ variants = ClinVar.hg38.snv_summary.produce
50
91
  muts = CMD.cmd('cut -f 1', :in => variants.open, :pipe => true)
51
- consequence = Sequence.job(:mutated_isoforms_fast, "Clinvar", :mutations => muts, :non_synonymous => true).clean.run(true)
92
+ consequence = Sequence.job(:mutated_isoforms_fast, "Clinvar", :mutations => muts, :non_synonymous => true, :organism => ClinVar.organism_hg38).clean.run(true)
52
93
 
53
94
  options = TSV.parse_header(variants).options.merge({:key_field => "Mutated Isoform"})
54
95
  fields = options[:fields].length
@@ -74,3 +115,10 @@ module ClinVar
74
115
  end
75
116
  end
76
117
 
118
+ if __FILE__ == $0
119
+ Log.severity = 0
120
+ ClinVar.hg19.snv_summary.produce
121
+ ClinVar.hg19.mi_summary.produce(true)
122
+ ClinVar.hg38.snv_summary.produce
123
+ ClinVar.hg38.mi_summary.produce(true)
124
+ end
@@ -46,6 +46,12 @@ module Organism
46
46
  Rbbt.share.install.Organism.find.glob('???').collect{|f| File.basename(f)}
47
47
  end
48
48
 
49
+ def self.prepared_organisms
50
+ Rbbt.share.organisms.glob_all("???/???????/{identifiers,chromosome_1,scientific_name}").collect{|f|
51
+ [File.basename(File.dirname(File.dirname(f))), File.basename(File.dirname(f))] * "/"
52
+ }.uniq
53
+ end
54
+
49
55
  def self.installable_organisms
50
56
  self.installed_organisms
51
57
  end
@@ -21,7 +21,7 @@ $biomart_lexicon = [
21
21
  $biomart_identifiers = [
22
22
  ['Entrez Gene ID', "entrezgene"],
23
23
  ['Ensembl Protein ID', "ensembl_peptide_id" ],
24
- ['Associated Gene Name' , "rgd_symbol"],
24
+ ['Associated Gene Name' , "external_gene_name"],
25
25
  ['Protein ID' , "protein_id"] ,
26
26
  ['UniProt/SwissProt ID' , "uniprot_swissprot"] ,
27
27
  ['UniProt/SwissProt Accession' , "uniprot_swissprot_accession"] ,
@@ -42,6 +42,14 @@ $biomart_identifiers = [
42
42
  #['Codelink ID ', "codelink"],
43
43
  ]
44
44
 
45
+ $biomart_protein_identifiers = [
46
+ [ 'Protein ID', "protein_id" ],
47
+ [ 'RefSeq Protein ID', "refseq_peptide" ],
48
+ [ 'Unigene ID', "unigene" ],
49
+ [ 'UniProt/SwissProt ID', "uniprot_swissprot"],
50
+ [ 'UniProt/SwissProt Accession', "uniprot_swissprot_accession"],
51
+ ]
52
+
45
53
  $namespace = File.basename(File.dirname(File.expand_path(__FILE__)))
46
54
  Thread.current["namespace"] = File.basename(File.dirname(File.expand_path(__FILE__)))
47
55
  load File.join(File.dirname(__FILE__), '../organism_helpers.rb')
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.51
4
+ version: 3.1.52
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-28 00:00:00.000000000 Z
11
+ date: 2021-04-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -176,7 +176,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
176
176
  - !ruby/object:Gem::Version
177
177
  version: '0'
178
178
  requirements: []
179
- rubygems_version: 3.0.6
179
+ rubygems_version: 3.1.4
180
180
  signing_key:
181
181
  specification_version: 4
182
182
  summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)