rbbt-sources 3.0.30 → 3.0.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/sources/clinvar.rb +78 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bcfbf7826895a76991509420782984e70d8385fe
|
4
|
+
data.tar.gz: 48624bc5d1a71539a4006470229df78631499f2d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eec55666221f0aa738a831b19b6371887bd981565ff0e4b3d4056ab90b2b1f2874b2375379c272d2177e0af8172aba58441f682c78a5c6aa78567f2eb332e7da
|
7
|
+
data.tar.gz: 8b35c2fce2244f219512cc01b1e6d9ce2d2fd598c87c3bbc307fee443cb3ef5c54b9d751b7b22d914ab174e8aa387c3cbd5e69b969e2c5e53b61e31cc1ed0ae3
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/resource'
|
3
|
+
|
4
|
+
module ClinVar
|
5
|
+
extend Resource
|
6
|
+
self.subdir = 'share/databases/ClinVar'
|
7
|
+
|
8
|
+
def self.organism(org="Hsa")
|
9
|
+
Organism.default_code(org)
|
10
|
+
end
|
11
|
+
|
12
|
+
ClinVar.claim ClinVar.variant_summary, :url, "ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz"
|
13
|
+
|
14
|
+
ClinVar.claim ClinVar.snv_summary, :proc do
|
15
|
+
url = "ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz"
|
16
|
+
io = TSV.traverse ClinVar.variant_summary, :type => :array, :into => :stream do |line|
|
17
|
+
line = Misc.fixutf8 line
|
18
|
+
begin
|
19
|
+
res = []
|
20
|
+
if line =~ /^#/
|
21
|
+
parts = line.split("\t")
|
22
|
+
res << (["#Genomic Mutation"] + parts[1..12] + parts[15..23]) * "\t"
|
23
|
+
else
|
24
|
+
next unless line =~ /GRCh37/
|
25
|
+
next if line =~ /(copy number|NT expansion|duplication|indel)/
|
26
|
+
parts = line.split("\t")
|
27
|
+
chr,pos,ref,mut = parts.values_at 13, 14, 25, 26
|
28
|
+
next if ref == 'na' or mut == 'na'
|
29
|
+
|
30
|
+
pos, muts = Misc.correct_mutation(pos.to_i,ref,mut)
|
31
|
+
muts.each do |mut|
|
32
|
+
mutation = [chr,pos,mut] * ":"
|
33
|
+
res << ([mutation] + parts[1..12] + parts[15..23]) * "\t"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
res.extend MultipleResult
|
37
|
+
res
|
38
|
+
rescue
|
39
|
+
Log.exception $!
|
40
|
+
raise $!
|
41
|
+
end
|
42
|
+
end
|
43
|
+
Misc.sort_stream(io)
|
44
|
+
end
|
45
|
+
|
46
|
+
ClinVar.claim ClinVar.mi_summary, :proc do
|
47
|
+
require 'rbbt/workflow'
|
48
|
+
Workflow.require_workflow "Sequence"
|
49
|
+
variants = ClinVar.snv_summary.produce
|
50
|
+
muts = CMD.cmd('cut -f 1', :in => variants.open, :pipe => true)
|
51
|
+
consequence = Sequence.job(:mutated_isoforms_fast, "Clinvar", :mutations => muts, :non_synonymous => true).clean.run(true)
|
52
|
+
|
53
|
+
options = TSV.parse_header(variants).options.merge({:key_field => "Mutated Isoform"})
|
54
|
+
fields = options[:fields].length
|
55
|
+
dumper = TSV::Dumper.new options
|
56
|
+
dumper.init
|
57
|
+
pasted = TSV.paste_streams([variants, TSV.get_stream(consequence)])
|
58
|
+
TSV.traverse pasted, :into => dumper, :bar => true do |mutation,values|
|
59
|
+
begin
|
60
|
+
mis = values[fields..-1].flatten
|
61
|
+
next if mis.empty?
|
62
|
+
res = []
|
63
|
+
res.extend MultipleResult
|
64
|
+
mis.each do |mi|
|
65
|
+
res << [mi, values[0..fields-1]]
|
66
|
+
end
|
67
|
+
res
|
68
|
+
rescue
|
69
|
+
Log.exception $!
|
70
|
+
raise $!
|
71
|
+
end
|
72
|
+
end
|
73
|
+
dumper.stream
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
iif ClinVar.mi_summary.produce(true).find if __FILE__ == $0
|
78
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.31
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-07-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -99,6 +99,7 @@ files:
|
|
99
99
|
- lib/rbbt/sources/bibtex.rb
|
100
100
|
- lib/rbbt/sources/biomart.rb
|
101
101
|
- lib/rbbt/sources/cath.rb
|
102
|
+
- lib/rbbt/sources/clinvar.rb
|
102
103
|
- lib/rbbt/sources/corum.rb
|
103
104
|
- lib/rbbt/sources/ensembl.rb
|
104
105
|
- lib/rbbt/sources/ensembl_ftp.rb
|