rbbt-sources 3.0.30 → 3.0.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/sources/clinvar.rb +78 -0
  3. metadata +3 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3770be50c944954cd02c46a5a28d7c0502810b6b
4
- data.tar.gz: b9fce35ae48f309058f314f77e72ca0685262542
3
+ metadata.gz: bcfbf7826895a76991509420782984e70d8385fe
4
+ data.tar.gz: 48624bc5d1a71539a4006470229df78631499f2d
5
5
  SHA512:
6
- metadata.gz: ceb660cc457717662da4ffc3b56159f98f5699e11c59d3ea8f896e03c3b8bb670e7085e953b1088497818255253a7543ef448863219562296391ff3a4d59170f
7
- data.tar.gz: b9c443b486bd3b06523b1e2b520c1acbcced1c7d3aa1c1d582df4244cc6a1a95e313588db2809f829677bb41bb1b0bdf8b861ea5d6e6ef08e6bc5fd81ef05c88
6
+ metadata.gz: eec55666221f0aa738a831b19b6371887bd981565ff0e4b3d4056ab90b2b1f2874b2375379c272d2177e0af8172aba58441f682c78a5c6aa78567f2eb332e7da
7
+ data.tar.gz: 8b35c2fce2244f219512cc01b1e6d9ce2d2fd598c87c3bbc307fee443cb3ef5c54b9d751b7b22d914ab174e8aa387c3cbd5e69b969e2c5e53b61e31cc1ed0ae3
@@ -0,0 +1,78 @@
1
+ require 'rbbt-util'
2
+ require 'rbbt/resource'
3
+
4
+ module ClinVar
5
+ extend Resource
6
+ self.subdir = 'share/databases/ClinVar'
7
+
8
+ def self.organism(org="Hsa")
9
+ Organism.default_code(org)
10
+ end
11
+
12
+ ClinVar.claim ClinVar.variant_summary, :url, "ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz"
13
+
14
+ ClinVar.claim ClinVar.snv_summary, :proc do
15
+ url = "ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz"
16
+ io = TSV.traverse ClinVar.variant_summary, :type => :array, :into => :stream do |line|
17
+ line = Misc.fixutf8 line
18
+ begin
19
+ res = []
20
+ if line =~ /^#/
21
+ parts = line.split("\t")
22
+ res << (["#Genomic Mutation"] + parts[1..12] + parts[15..23]) * "\t"
23
+ else
24
+ next unless line =~ /GRCh37/
25
+ next if line =~ /(copy number|NT expansion|duplication|indel)/
26
+ parts = line.split("\t")
27
+ chr,pos,ref,mut = parts.values_at 13, 14, 25, 26
28
+ next if ref == 'na' or mut == 'na'
29
+
30
+ pos, muts = Misc.correct_mutation(pos.to_i,ref,mut)
31
+ muts.each do |mut|
32
+ mutation = [chr,pos,mut] * ":"
33
+ res << ([mutation] + parts[1..12] + parts[15..23]) * "\t"
34
+ end
35
+ end
36
+ res.extend MultipleResult
37
+ res
38
+ rescue
39
+ Log.exception $!
40
+ raise $!
41
+ end
42
+ end
43
+ Misc.sort_stream(io)
44
+ end
45
+
46
+ ClinVar.claim ClinVar.mi_summary, :proc do
47
+ require 'rbbt/workflow'
48
+ Workflow.require_workflow "Sequence"
49
+ variants = ClinVar.snv_summary.produce
50
+ muts = CMD.cmd('cut -f 1', :in => variants.open, :pipe => true)
51
+ consequence = Sequence.job(:mutated_isoforms_fast, "Clinvar", :mutations => muts, :non_synonymous => true).clean.run(true)
52
+
53
+ options = TSV.parse_header(variants).options.merge({:key_field => "Mutated Isoform"})
54
+ fields = options[:fields].length
55
+ dumper = TSV::Dumper.new options
56
+ dumper.init
57
+ pasted = TSV.paste_streams([variants, TSV.get_stream(consequence)])
58
+ TSV.traverse pasted, :into => dumper, :bar => true do |mutation,values|
59
+ begin
60
+ mis = values[fields..-1].flatten
61
+ next if mis.empty?
62
+ res = []
63
+ res.extend MultipleResult
64
+ mis.each do |mi|
65
+ res << [mi, values[0..fields-1]]
66
+ end
67
+ res
68
+ rescue
69
+ Log.exception $!
70
+ raise $!
71
+ end
72
+ end
73
+ dumper.stream
74
+ end
75
+ end
76
+
77
+ iif ClinVar.mi_summary.produce(true).find if __FILE__ == $0
78
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.30
4
+ version: 3.0.31
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-24 00:00:00.000000000 Z
11
+ date: 2015-07-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -99,6 +99,7 @@ files:
99
99
  - lib/rbbt/sources/bibtex.rb
100
100
  - lib/rbbt/sources/biomart.rb
101
101
  - lib/rbbt/sources/cath.rb
102
+ - lib/rbbt/sources/clinvar.rb
102
103
  - lib/rbbt/sources/corum.rb
103
104
  - lib/rbbt/sources/ensembl.rb
104
105
  - lib/rbbt/sources/ensembl_ftp.rb