rbbt-sources 3.1.51 → 3.1.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4cb92f1a2c300c6787870f06f5b4ea45967242a33e7c51cf94f002d1901000af
4
- data.tar.gz: 793c56312f02532861451142988b24463de6a30d460f6f7a7238889b91c9c336
3
+ metadata.gz: 19722279e8b56d1b0cb9e72001e11333e8e436d3a59bf746be3ed744afd047e1
4
+ data.tar.gz: 7eca716d5a2ee5ab6ba35547230d01466417f373df775fda110cae79366b56d5
5
5
  SHA512:
6
- metadata.gz: 95ab052b23bc28f919e9cf242e0a74e1e433335879d77fc38752925f74e5ec92b8b42b1c27f1fee170bb3125be1a04b83b24c7da524c46efce07241ff169bdff
7
- data.tar.gz: bbe1970028942398b1b84e314d704acb0fac405045785e45516e8dbd65927852874bcceb0393748fd68a507c6d4292fda579d1df16d7831d47b5b9ea0de6bf7d
6
+ metadata.gz: 9acfd6f4718444fed3891d431e49396d42293105767915375bf86d485e84ed58c5dbb81eff6509ced77e6ae15c38bbbcf91d52590aca6d53e0b3feb4db8c90b6
7
+ data.tar.gz: 572c173002d2bad704df755542448fc294071c0849bfbac997ce254001591fb463ea8244c167066953224b1e758108ef91417a10b22c02bd0991d819efc68453
@@ -1,9 +1,4 @@
1
1
  may2009
2
- may2012
3
- dec2013
4
2
  feb2014
5
- dec2014
6
- dec2015
7
- oct2016
8
3
  may2017
9
4
  apr2019
@@ -1,54 +1,95 @@
1
1
  require 'rbbt-util'
2
2
  require 'rbbt/resource'
3
+ require 'rbbt/sources/organism'
3
4
 
4
5
  module ClinVar
5
6
  extend Resource
6
7
  self.subdir = 'share/databases/ClinVar'
7
8
 
8
- def self.organism(org="Hsa")
9
- Organism.default_code(org)
9
+ def self.organism_hg19(org="Hsa")
10
+ Organism.organism_for_build("hg19")
10
11
  end
11
12
 
13
+ def self.organism_hg38(org="Hsa")
14
+ Organism.organism_for_build("hg38")
15
+ end
16
+
17
+
12
18
  ClinVar.claim ClinVar.variant_summary, :url, "ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz"
13
19
 
14
- ClinVar.claim ClinVar.snv_summary, :proc do
15
- url = "ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz"
16
- io = TSV.traverse ClinVar.variant_summary, :type => :array, :into => :stream do |line|
17
- line = Misc.fixutf8 line
20
+ ClinVar.claim ClinVar.hg19.snv_summary, :proc do
21
+ parser = TSV::Parser.new ClinVar.variant_summary, :type => :list
22
+ dumper = TSV::Dumper.new :fields => parser.fields, :key_field => "Genomic Mutation", :organism => ClinVar.organism_hg19
23
+ dumper.init
24
+ chr_pos, start_pos, ref_pos, alt_pos, assembly_pos = %w(Chromosome PositionVCF ReferenceAlleleVCF AlternateAlleleVCF Assembly).collect{|f| parser.fields.index f}
25
+ TSV.traverse parser, :into => dumper, :bar => true do |allele,values,fields|
26
+ chr, start, ref, alt, assembly = values.values_at chr_pos, start_pos, ref_pos, alt_pos, assembly_pos
27
+ next if assembly != "GRCh37"
28
+ pos, muts = Misc.correct_vcf_mutation(start.to_i, ref, alt)
29
+ res = muts.collect{|m| [[chr, pos, m] * ":", values] }
30
+
31
+ res.extend MultipleResult
32
+
33
+ res
34
+ end
35
+ dumper.stream
36
+ end
37
+
38
+ ClinVar.claim ClinVar.hg38.snv_summary, :proc do
39
+ parser = TSV::Parser.new ClinVar.variant_summary, :type => :list
40
+ dumper = TSV::Dumper.new :fields => parser.fields, :key_field => "Genomic Mutation", :organism => ClinVar.organism_hg38
41
+ dumper.init
42
+ chr_pos, start_pos, ref_pos, alt_pos, assembly_pos = %w(Chromosome PositionVCF ReferenceAlleleVCF AlternateAlleleVCF Assembly).collect{|f| parser.fields.index f}
43
+ TSV.traverse parser, :into => dumper, :bar => true do |allele,values,fields|
44
+ chr, start, ref, alt, assembly = values.values_at chr_pos, start_pos, ref_pos, alt_pos, assembly_pos
45
+ next if assembly != "GRCh38"
46
+ pos, muts = Misc.correct_vcf_mutation(start.to_i, ref, alt)
47
+ res = muts.collect{|m| [[chr, pos, m] * ":", values] }
48
+
49
+ res.extend MultipleResult
50
+
51
+ res
52
+ end
53
+ dumper.stream
54
+ end
55
+
56
+
57
+ ClinVar.claim ClinVar.hg19.mi_summary, :proc do
58
+ require 'rbbt/workflow'
59
+ Workflow.require_workflow "Sequence"
60
+ variants = ClinVar.hg19.snv_summary.produce
61
+ muts = CMD.cmd('cut -f 1', :in => variants.open, :pipe => true)
62
+ consequence = Sequence.job(:mutated_isoforms_fast, "Clinvar", :mutations => muts, :non_synonymous => true, :organism => ClinVar.organism_hg19).clean.run(true)
63
+
64
+ options = TSV.parse_header(variants).options.merge({:key_field => "Mutated Isoform"})
65
+ fields = options[:fields].length
66
+ dumper = TSV::Dumper.new options
67
+ dumper.init
68
+ pasted = TSV.paste_streams([variants, TSV.get_stream(consequence)])
69
+ TSV.traverse pasted, :into => dumper, :bar => true do |mutation,values|
18
70
  begin
71
+ mis = values[fields..-1].flatten
72
+ next if mis.empty?
19
73
  res = []
20
- if line =~ /^#/
21
- parts = line.split("\t")
22
- res << (["#Genomic Mutation"] + parts[1..12] + parts[15..23]) * "\t"
23
- else
24
- next unless line =~ /GRCh37/
25
- next if line =~ /(copy number|NT expansion|duplication|indel)/
26
- parts = line.split("\t")
27
- chr,pos,ref,mut = parts.values_at 13, 14, 25, 26
28
- next if ref == 'na' or mut == 'na'
29
-
30
- pos, muts = Misc.correct_mutation(pos.to_i,ref,mut)
31
- muts.each do |mut|
32
- mutation = [chr,pos,mut] * ":"
33
- res << ([mutation] + parts[1..12] + parts[15..23]) * "\t"
34
- end
35
- end
36
74
  res.extend MultipleResult
75
+ mis.each do |mi|
76
+ res << [mi, values[0..fields-1]]
77
+ end
37
78
  res
38
79
  rescue
39
80
  Log.exception $!
40
81
  raise $!
41
82
  end
42
83
  end
43
- Misc.sort_stream(io)
84
+ dumper.stream
44
85
  end
45
86
 
46
- ClinVar.claim ClinVar.mi_summary, :proc do
87
+ ClinVar.claim ClinVar.hg38.mi_summary, :proc do
47
88
  require 'rbbt/workflow'
48
89
  Workflow.require_workflow "Sequence"
49
- variants = ClinVar.snv_summary.produce
90
+ variants = ClinVar.hg38.snv_summary.produce
50
91
  muts = CMD.cmd('cut -f 1', :in => variants.open, :pipe => true)
51
- consequence = Sequence.job(:mutated_isoforms_fast, "Clinvar", :mutations => muts, :non_synonymous => true).clean.run(true)
92
+ consequence = Sequence.job(:mutated_isoforms_fast, "Clinvar", :mutations => muts, :non_synonymous => true, :organism => ClinVar.organism_hg38).clean.run(true)
52
93
 
53
94
  options = TSV.parse_header(variants).options.merge({:key_field => "Mutated Isoform"})
54
95
  fields = options[:fields].length
@@ -74,3 +115,10 @@ module ClinVar
74
115
  end
75
116
  end
76
117
 
118
+ if __FILE__ == $0
119
+ Log.severity = 0
120
+ ClinVar.hg19.snv_summary.produce
121
+ ClinVar.hg19.mi_summary.produce(true)
122
+ ClinVar.hg38.snv_summary.produce
123
+ ClinVar.hg38.mi_summary.produce(true)
124
+ end
@@ -46,6 +46,12 @@ module Organism
46
46
  Rbbt.share.install.Organism.find.glob('???').collect{|f| File.basename(f)}
47
47
  end
48
48
 
49
+ def self.prepared_organisms
50
+ Rbbt.share.organisms.glob_all("???/???????/{identifiers,chromosome_1,scientific_name}").collect{|f|
51
+ [File.basename(File.dirname(File.dirname(f))), File.basename(File.dirname(f))] * "/"
52
+ }.uniq
53
+ end
54
+
49
55
  def self.installable_organisms
50
56
  self.installed_organisms
51
57
  end
@@ -21,7 +21,7 @@ $biomart_lexicon = [
21
21
  $biomart_identifiers = [
22
22
  ['Entrez Gene ID', "entrezgene"],
23
23
  ['Ensembl Protein ID', "ensembl_peptide_id" ],
24
- ['Associated Gene Name' , "rgd_symbol"],
24
+ ['Associated Gene Name' , "external_gene_name"],
25
25
  ['Protein ID' , "protein_id"] ,
26
26
  ['UniProt/SwissProt ID' , "uniprot_swissprot"] ,
27
27
  ['UniProt/SwissProt Accession' , "uniprot_swissprot_accession"] ,
@@ -42,6 +42,14 @@ $biomart_identifiers = [
42
42
  #['Codelink ID ', "codelink"],
43
43
  ]
44
44
 
45
+ $biomart_protein_identifiers = [
46
+ [ 'Protein ID', "protein_id" ],
47
+ [ 'RefSeq Protein ID', "refseq_peptide" ],
48
+ [ 'Unigene ID', "unigene" ],
49
+ [ 'UniProt/SwissProt ID', "uniprot_swissprot"],
50
+ [ 'UniProt/SwissProt Accession', "uniprot_swissprot_accession"],
51
+ ]
52
+
45
53
  $namespace = File.basename(File.dirname(File.expand_path(__FILE__)))
46
54
  Thread.current["namespace"] = File.basename(File.dirname(File.expand_path(__FILE__)))
47
55
  load File.join(File.dirname(__FILE__), '../organism_helpers.rb')
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.51
4
+ version: 3.1.52
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-28 00:00:00.000000000 Z
11
+ date: 2021-04-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -176,7 +176,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
176
176
  - !ruby/object:Gem::Version
177
177
  version: '0'
178
178
  requirements: []
179
- rubygems_version: 3.0.6
179
+ rubygems_version: 3.1.4
180
180
  signing_key:
181
181
  specification_version: 4
182
182
  summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)