rbbt-sources 3.1.51 → 3.1.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/etc/allowed_biomart_archives +0 -5
- data/lib/rbbt/sources/clinvar.rb +74 -26
- data/lib/rbbt/sources/organism.rb +6 -0
- data/share/install/Organism/Rno/Rakefile +9 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 19722279e8b56d1b0cb9e72001e11333e8e436d3a59bf746be3ed744afd047e1
|
4
|
+
data.tar.gz: 7eca716d5a2ee5ab6ba35547230d01466417f373df775fda110cae79366b56d5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9acfd6f4718444fed3891d431e49396d42293105767915375bf86d485e84ed58c5dbb81eff6509ced77e6ae15c38bbbcf91d52590aca6d53e0b3feb4db8c90b6
|
7
|
+
data.tar.gz: 572c173002d2bad704df755542448fc294071c0849bfbac997ce254001591fb463ea8244c167066953224b1e758108ef91417a10b22c02bd0991d819efc68453
|
data/lib/rbbt/sources/clinvar.rb
CHANGED
@@ -1,54 +1,95 @@
|
|
1
1
|
require 'rbbt-util'
|
2
2
|
require 'rbbt/resource'
|
3
|
+
require 'rbbt/sources/organism'
|
3
4
|
|
4
5
|
module ClinVar
|
5
6
|
extend Resource
|
6
7
|
self.subdir = 'share/databases/ClinVar'
|
7
8
|
|
8
|
-
def self.
|
9
|
-
Organism.
|
9
|
+
def self.organism_hg19(org="Hsa")
|
10
|
+
Organism.organism_for_build("hg19")
|
10
11
|
end
|
11
12
|
|
13
|
+
def self.organism_hg38(org="Hsa")
|
14
|
+
Organism.organism_for_build("hg38")
|
15
|
+
end
|
16
|
+
|
17
|
+
|
12
18
|
ClinVar.claim ClinVar.variant_summary, :url, "ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz"
|
13
19
|
|
14
|
-
ClinVar.claim ClinVar.snv_summary, :proc do
|
15
|
-
|
16
|
-
|
17
|
-
|
20
|
+
ClinVar.claim ClinVar.hg19.snv_summary, :proc do
|
21
|
+
parser = TSV::Parser.new ClinVar.variant_summary, :type => :list
|
22
|
+
dumper = TSV::Dumper.new :fields => parser.fields, :key_field => "Genomic Mutation", :organism => ClinVar.organism_hg19
|
23
|
+
dumper.init
|
24
|
+
chr_pos, start_pos, ref_pos, alt_pos, assembly_pos = %w(Chromosome PositionVCF ReferenceAlleleVCF AlternateAlleleVCF Assembly).collect{|f| parser.fields.index f}
|
25
|
+
TSV.traverse parser, :into => dumper, :bar => true do |allele,values,fields|
|
26
|
+
chr, start, ref, alt, assembly = values.values_at chr_pos, start_pos, ref_pos, alt_pos, assembly_pos
|
27
|
+
next if assembly != "GRCh37"
|
28
|
+
pos, muts = Misc.correct_vcf_mutation(start.to_i, ref, alt)
|
29
|
+
res = muts.collect{|m| [[chr, pos, m] * ":", values] }
|
30
|
+
|
31
|
+
res.extend MultipleResult
|
32
|
+
|
33
|
+
res
|
34
|
+
end
|
35
|
+
dumper.stream
|
36
|
+
end
|
37
|
+
|
38
|
+
ClinVar.claim ClinVar.hg38.snv_summary, :proc do
|
39
|
+
parser = TSV::Parser.new ClinVar.variant_summary, :type => :list
|
40
|
+
dumper = TSV::Dumper.new :fields => parser.fields, :key_field => "Genomic Mutation", :organism => ClinVar.organism_hg38
|
41
|
+
dumper.init
|
42
|
+
chr_pos, start_pos, ref_pos, alt_pos, assembly_pos = %w(Chromosome PositionVCF ReferenceAlleleVCF AlternateAlleleVCF Assembly).collect{|f| parser.fields.index f}
|
43
|
+
TSV.traverse parser, :into => dumper, :bar => true do |allele,values,fields|
|
44
|
+
chr, start, ref, alt, assembly = values.values_at chr_pos, start_pos, ref_pos, alt_pos, assembly_pos
|
45
|
+
next if assembly != "GRCh38"
|
46
|
+
pos, muts = Misc.correct_vcf_mutation(start.to_i, ref, alt)
|
47
|
+
res = muts.collect{|m| [[chr, pos, m] * ":", values] }
|
48
|
+
|
49
|
+
res.extend MultipleResult
|
50
|
+
|
51
|
+
res
|
52
|
+
end
|
53
|
+
dumper.stream
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
ClinVar.claim ClinVar.hg19.mi_summary, :proc do
|
58
|
+
require 'rbbt/workflow'
|
59
|
+
Workflow.require_workflow "Sequence"
|
60
|
+
variants = ClinVar.hg19.snv_summary.produce
|
61
|
+
muts = CMD.cmd('cut -f 1', :in => variants.open, :pipe => true)
|
62
|
+
consequence = Sequence.job(:mutated_isoforms_fast, "Clinvar", :mutations => muts, :non_synonymous => true, :organism => ClinVar.organism_hg19).clean.run(true)
|
63
|
+
|
64
|
+
options = TSV.parse_header(variants).options.merge({:key_field => "Mutated Isoform"})
|
65
|
+
fields = options[:fields].length
|
66
|
+
dumper = TSV::Dumper.new options
|
67
|
+
dumper.init
|
68
|
+
pasted = TSV.paste_streams([variants, TSV.get_stream(consequence)])
|
69
|
+
TSV.traverse pasted, :into => dumper, :bar => true do |mutation,values|
|
18
70
|
begin
|
71
|
+
mis = values[fields..-1].flatten
|
72
|
+
next if mis.empty?
|
19
73
|
res = []
|
20
|
-
if line =~ /^#/
|
21
|
-
parts = line.split("\t")
|
22
|
-
res << (["#Genomic Mutation"] + parts[1..12] + parts[15..23]) * "\t"
|
23
|
-
else
|
24
|
-
next unless line =~ /GRCh37/
|
25
|
-
next if line =~ /(copy number|NT expansion|duplication|indel)/
|
26
|
-
parts = line.split("\t")
|
27
|
-
chr,pos,ref,mut = parts.values_at 13, 14, 25, 26
|
28
|
-
next if ref == 'na' or mut == 'na'
|
29
|
-
|
30
|
-
pos, muts = Misc.correct_mutation(pos.to_i,ref,mut)
|
31
|
-
muts.each do |mut|
|
32
|
-
mutation = [chr,pos,mut] * ":"
|
33
|
-
res << ([mutation] + parts[1..12] + parts[15..23]) * "\t"
|
34
|
-
end
|
35
|
-
end
|
36
74
|
res.extend MultipleResult
|
75
|
+
mis.each do |mi|
|
76
|
+
res << [mi, values[0..fields-1]]
|
77
|
+
end
|
37
78
|
res
|
38
79
|
rescue
|
39
80
|
Log.exception $!
|
40
81
|
raise $!
|
41
82
|
end
|
42
83
|
end
|
43
|
-
|
84
|
+
dumper.stream
|
44
85
|
end
|
45
86
|
|
46
|
-
ClinVar.claim ClinVar.mi_summary, :proc do
|
87
|
+
ClinVar.claim ClinVar.hg38.mi_summary, :proc do
|
47
88
|
require 'rbbt/workflow'
|
48
89
|
Workflow.require_workflow "Sequence"
|
49
|
-
variants = ClinVar.snv_summary.produce
|
90
|
+
variants = ClinVar.hg38.snv_summary.produce
|
50
91
|
muts = CMD.cmd('cut -f 1', :in => variants.open, :pipe => true)
|
51
|
-
consequence = Sequence.job(:mutated_isoforms_fast, "Clinvar", :mutations => muts, :non_synonymous => true).clean.run(true)
|
92
|
+
consequence = Sequence.job(:mutated_isoforms_fast, "Clinvar", :mutations => muts, :non_synonymous => true, :organism => ClinVar.organism_hg38).clean.run(true)
|
52
93
|
|
53
94
|
options = TSV.parse_header(variants).options.merge({:key_field => "Mutated Isoform"})
|
54
95
|
fields = options[:fields].length
|
@@ -74,3 +115,10 @@ module ClinVar
|
|
74
115
|
end
|
75
116
|
end
|
76
117
|
|
118
|
+
if __FILE__ == $0
|
119
|
+
Log.severity = 0
|
120
|
+
ClinVar.hg19.snv_summary.produce
|
121
|
+
ClinVar.hg19.mi_summary.produce(true)
|
122
|
+
ClinVar.hg38.snv_summary.produce
|
123
|
+
ClinVar.hg38.mi_summary.produce(true)
|
124
|
+
end
|
@@ -46,6 +46,12 @@ module Organism
|
|
46
46
|
Rbbt.share.install.Organism.find.glob('???').collect{|f| File.basename(f)}
|
47
47
|
end
|
48
48
|
|
49
|
+
def self.prepared_organisms
|
50
|
+
Rbbt.share.organisms.glob_all("???/???????/{identifiers,chromosome_1,scientific_name}").collect{|f|
|
51
|
+
[File.basename(File.dirname(File.dirname(f))), File.basename(File.dirname(f))] * "/"
|
52
|
+
}.uniq
|
53
|
+
end
|
54
|
+
|
49
55
|
def self.installable_organisms
|
50
56
|
self.installed_organisms
|
51
57
|
end
|
@@ -21,7 +21,7 @@ $biomart_lexicon = [
|
|
21
21
|
$biomart_identifiers = [
|
22
22
|
['Entrez Gene ID', "entrezgene"],
|
23
23
|
['Ensembl Protein ID', "ensembl_peptide_id" ],
|
24
|
-
['Associated Gene Name' , "
|
24
|
+
['Associated Gene Name' , "external_gene_name"],
|
25
25
|
['Protein ID' , "protein_id"] ,
|
26
26
|
['UniProt/SwissProt ID' , "uniprot_swissprot"] ,
|
27
27
|
['UniProt/SwissProt Accession' , "uniprot_swissprot_accession"] ,
|
@@ -42,6 +42,14 @@ $biomart_identifiers = [
|
|
42
42
|
#['Codelink ID ', "codelink"],
|
43
43
|
]
|
44
44
|
|
45
|
+
$biomart_protein_identifiers = [
|
46
|
+
[ 'Protein ID', "protein_id" ],
|
47
|
+
[ 'RefSeq Protein ID', "refseq_peptide" ],
|
48
|
+
[ 'Unigene ID', "unigene" ],
|
49
|
+
[ 'UniProt/SwissProt ID', "uniprot_swissprot"],
|
50
|
+
[ 'UniProt/SwissProt Accession', "uniprot_swissprot_accession"],
|
51
|
+
]
|
52
|
+
|
45
53
|
$namespace = File.basename(File.dirname(File.expand_path(__FILE__)))
|
46
54
|
Thread.current["namespace"] = File.basename(File.dirname(File.expand_path(__FILE__)))
|
47
55
|
load File.join(File.dirname(__FILE__), '../organism_helpers.rb')
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.52
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-04-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -176,7 +176,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
176
176
|
- !ruby/object:Gem::Version
|
177
177
|
version: '0'
|
178
178
|
requirements: []
|
179
|
-
rubygems_version: 3.
|
179
|
+
rubygems_version: 3.1.4
|
180
180
|
signing_key:
|
181
181
|
specification_version: 4
|
182
182
|
summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)
|