rbbt-sources 3.1.51 → 3.1.52
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/etc/allowed_biomart_archives +0 -5
- data/lib/rbbt/sources/clinvar.rb +74 -26
- data/lib/rbbt/sources/organism.rb +6 -0
- data/share/install/Organism/Rno/Rakefile +9 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 19722279e8b56d1b0cb9e72001e11333e8e436d3a59bf746be3ed744afd047e1
|
4
|
+
data.tar.gz: 7eca716d5a2ee5ab6ba35547230d01466417f373df775fda110cae79366b56d5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9acfd6f4718444fed3891d431e49396d42293105767915375bf86d485e84ed58c5dbb81eff6509ced77e6ae15c38bbbcf91d52590aca6d53e0b3feb4db8c90b6
|
7
|
+
data.tar.gz: 572c173002d2bad704df755542448fc294071c0849bfbac997ce254001591fb463ea8244c167066953224b1e758108ef91417a10b22c02bd0991d819efc68453
|
data/lib/rbbt/sources/clinvar.rb
CHANGED
@@ -1,54 +1,95 @@
|
|
1
1
|
require 'rbbt-util'
|
2
2
|
require 'rbbt/resource'
|
3
|
+
require 'rbbt/sources/organism'
|
3
4
|
|
4
5
|
module ClinVar
|
5
6
|
extend Resource
|
6
7
|
self.subdir = 'share/databases/ClinVar'
|
7
8
|
|
8
|
-
def self.
|
9
|
-
Organism.
|
9
|
+
def self.organism_hg19(org="Hsa")
|
10
|
+
Organism.organism_for_build("hg19")
|
10
11
|
end
|
11
12
|
|
13
|
+
def self.organism_hg38(org="Hsa")
|
14
|
+
Organism.organism_for_build("hg38")
|
15
|
+
end
|
16
|
+
|
17
|
+
|
12
18
|
ClinVar.claim ClinVar.variant_summary, :url, "ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz"
|
13
19
|
|
14
|
-
ClinVar.claim ClinVar.snv_summary, :proc do
|
15
|
-
|
16
|
-
|
17
|
-
|
20
|
+
ClinVar.claim ClinVar.hg19.snv_summary, :proc do
|
21
|
+
parser = TSV::Parser.new ClinVar.variant_summary, :type => :list
|
22
|
+
dumper = TSV::Dumper.new :fields => parser.fields, :key_field => "Genomic Mutation", :organism => ClinVar.organism_hg19
|
23
|
+
dumper.init
|
24
|
+
chr_pos, start_pos, ref_pos, alt_pos, assembly_pos = %w(Chromosome PositionVCF ReferenceAlleleVCF AlternateAlleleVCF Assembly).collect{|f| parser.fields.index f}
|
25
|
+
TSV.traverse parser, :into => dumper, :bar => true do |allele,values,fields|
|
26
|
+
chr, start, ref, alt, assembly = values.values_at chr_pos, start_pos, ref_pos, alt_pos, assembly_pos
|
27
|
+
next if assembly != "GRCh37"
|
28
|
+
pos, muts = Misc.correct_vcf_mutation(start.to_i, ref, alt)
|
29
|
+
res = muts.collect{|m| [[chr, pos, m] * ":", values] }
|
30
|
+
|
31
|
+
res.extend MultipleResult
|
32
|
+
|
33
|
+
res
|
34
|
+
end
|
35
|
+
dumper.stream
|
36
|
+
end
|
37
|
+
|
38
|
+
ClinVar.claim ClinVar.hg38.snv_summary, :proc do
|
39
|
+
parser = TSV::Parser.new ClinVar.variant_summary, :type => :list
|
40
|
+
dumper = TSV::Dumper.new :fields => parser.fields, :key_field => "Genomic Mutation", :organism => ClinVar.organism_hg38
|
41
|
+
dumper.init
|
42
|
+
chr_pos, start_pos, ref_pos, alt_pos, assembly_pos = %w(Chromosome PositionVCF ReferenceAlleleVCF AlternateAlleleVCF Assembly).collect{|f| parser.fields.index f}
|
43
|
+
TSV.traverse parser, :into => dumper, :bar => true do |allele,values,fields|
|
44
|
+
chr, start, ref, alt, assembly = values.values_at chr_pos, start_pos, ref_pos, alt_pos, assembly_pos
|
45
|
+
next if assembly != "GRCh38"
|
46
|
+
pos, muts = Misc.correct_vcf_mutation(start.to_i, ref, alt)
|
47
|
+
res = muts.collect{|m| [[chr, pos, m] * ":", values] }
|
48
|
+
|
49
|
+
res.extend MultipleResult
|
50
|
+
|
51
|
+
res
|
52
|
+
end
|
53
|
+
dumper.stream
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
ClinVar.claim ClinVar.hg19.mi_summary, :proc do
|
58
|
+
require 'rbbt/workflow'
|
59
|
+
Workflow.require_workflow "Sequence"
|
60
|
+
variants = ClinVar.hg19.snv_summary.produce
|
61
|
+
muts = CMD.cmd('cut -f 1', :in => variants.open, :pipe => true)
|
62
|
+
consequence = Sequence.job(:mutated_isoforms_fast, "Clinvar", :mutations => muts, :non_synonymous => true, :organism => ClinVar.organism_hg19).clean.run(true)
|
63
|
+
|
64
|
+
options = TSV.parse_header(variants).options.merge({:key_field => "Mutated Isoform"})
|
65
|
+
fields = options[:fields].length
|
66
|
+
dumper = TSV::Dumper.new options
|
67
|
+
dumper.init
|
68
|
+
pasted = TSV.paste_streams([variants, TSV.get_stream(consequence)])
|
69
|
+
TSV.traverse pasted, :into => dumper, :bar => true do |mutation,values|
|
18
70
|
begin
|
71
|
+
mis = values[fields..-1].flatten
|
72
|
+
next if mis.empty?
|
19
73
|
res = []
|
20
|
-
if line =~ /^#/
|
21
|
-
parts = line.split("\t")
|
22
|
-
res << (["#Genomic Mutation"] + parts[1..12] + parts[15..23]) * "\t"
|
23
|
-
else
|
24
|
-
next unless line =~ /GRCh37/
|
25
|
-
next if line =~ /(copy number|NT expansion|duplication|indel)/
|
26
|
-
parts = line.split("\t")
|
27
|
-
chr,pos,ref,mut = parts.values_at 13, 14, 25, 26
|
28
|
-
next if ref == 'na' or mut == 'na'
|
29
|
-
|
30
|
-
pos, muts = Misc.correct_mutation(pos.to_i,ref,mut)
|
31
|
-
muts.each do |mut|
|
32
|
-
mutation = [chr,pos,mut] * ":"
|
33
|
-
res << ([mutation] + parts[1..12] + parts[15..23]) * "\t"
|
34
|
-
end
|
35
|
-
end
|
36
74
|
res.extend MultipleResult
|
75
|
+
mis.each do |mi|
|
76
|
+
res << [mi, values[0..fields-1]]
|
77
|
+
end
|
37
78
|
res
|
38
79
|
rescue
|
39
80
|
Log.exception $!
|
40
81
|
raise $!
|
41
82
|
end
|
42
83
|
end
|
43
|
-
|
84
|
+
dumper.stream
|
44
85
|
end
|
45
86
|
|
46
|
-
ClinVar.claim ClinVar.mi_summary, :proc do
|
87
|
+
ClinVar.claim ClinVar.hg38.mi_summary, :proc do
|
47
88
|
require 'rbbt/workflow'
|
48
89
|
Workflow.require_workflow "Sequence"
|
49
|
-
variants = ClinVar.snv_summary.produce
|
90
|
+
variants = ClinVar.hg38.snv_summary.produce
|
50
91
|
muts = CMD.cmd('cut -f 1', :in => variants.open, :pipe => true)
|
51
|
-
consequence = Sequence.job(:mutated_isoforms_fast, "Clinvar", :mutations => muts, :non_synonymous => true).clean.run(true)
|
92
|
+
consequence = Sequence.job(:mutated_isoforms_fast, "Clinvar", :mutations => muts, :non_synonymous => true, :organism => ClinVar.organism_hg38).clean.run(true)
|
52
93
|
|
53
94
|
options = TSV.parse_header(variants).options.merge({:key_field => "Mutated Isoform"})
|
54
95
|
fields = options[:fields].length
|
@@ -74,3 +115,10 @@ module ClinVar
|
|
74
115
|
end
|
75
116
|
end
|
76
117
|
|
118
|
+
if __FILE__ == $0
|
119
|
+
Log.severity = 0
|
120
|
+
ClinVar.hg19.snv_summary.produce
|
121
|
+
ClinVar.hg19.mi_summary.produce(true)
|
122
|
+
ClinVar.hg38.snv_summary.produce
|
123
|
+
ClinVar.hg38.mi_summary.produce(true)
|
124
|
+
end
|
@@ -46,6 +46,12 @@ module Organism
|
|
46
46
|
Rbbt.share.install.Organism.find.glob('???').collect{|f| File.basename(f)}
|
47
47
|
end
|
48
48
|
|
49
|
+
def self.prepared_organisms
|
50
|
+
Rbbt.share.organisms.glob_all("???/???????/{identifiers,chromosome_1,scientific_name}").collect{|f|
|
51
|
+
[File.basename(File.dirname(File.dirname(f))), File.basename(File.dirname(f))] * "/"
|
52
|
+
}.uniq
|
53
|
+
end
|
54
|
+
|
49
55
|
def self.installable_organisms
|
50
56
|
self.installed_organisms
|
51
57
|
end
|
@@ -21,7 +21,7 @@ $biomart_lexicon = [
|
|
21
21
|
$biomart_identifiers = [
|
22
22
|
['Entrez Gene ID', "entrezgene"],
|
23
23
|
['Ensembl Protein ID', "ensembl_peptide_id" ],
|
24
|
-
['Associated Gene Name' , "
|
24
|
+
['Associated Gene Name' , "external_gene_name"],
|
25
25
|
['Protein ID' , "protein_id"] ,
|
26
26
|
['UniProt/SwissProt ID' , "uniprot_swissprot"] ,
|
27
27
|
['UniProt/SwissProt Accession' , "uniprot_swissprot_accession"] ,
|
@@ -42,6 +42,14 @@ $biomart_identifiers = [
|
|
42
42
|
#['Codelink ID ', "codelink"],
|
43
43
|
]
|
44
44
|
|
45
|
+
$biomart_protein_identifiers = [
|
46
|
+
[ 'Protein ID', "protein_id" ],
|
47
|
+
[ 'RefSeq Protein ID', "refseq_peptide" ],
|
48
|
+
[ 'Unigene ID', "unigene" ],
|
49
|
+
[ 'UniProt/SwissProt ID', "uniprot_swissprot"],
|
50
|
+
[ 'UniProt/SwissProt Accession', "uniprot_swissprot_accession"],
|
51
|
+
]
|
52
|
+
|
45
53
|
$namespace = File.basename(File.dirname(File.expand_path(__FILE__)))
|
46
54
|
Thread.current["namespace"] = File.basename(File.dirname(File.expand_path(__FILE__)))
|
47
55
|
load File.join(File.dirname(__FILE__), '../organism_helpers.rb')
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.52
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-04-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -176,7 +176,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
176
176
|
- !ruby/object:Gem::Version
|
177
177
|
version: '0'
|
178
178
|
requirements: []
|
179
|
-
rubygems_version: 3.
|
179
|
+
rubygems_version: 3.1.4
|
180
180
|
signing_key:
|
181
181
|
specification_version: 4
|
182
182
|
summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)
|