germ 0.1 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/vcf.rb CHANGED
@@ -1,10 +1,13 @@
1
1
  require 'mutation_set'
2
- require 'oncotator'
3
2
  require 'yaml'
4
3
 
5
- class VCF < MutationSet::Sample
6
- requires "#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT"
4
+ class VCF < Mutation::Collection
5
+ header_on
6
+ requires :chrom => :str, :pos => :int, :id => :str, :ref => :str,
7
+ :alt => :str, :qual => :str, :filter => :str, :info => [ ";", "=" ]
8
+ might_have :format => :str
7
9
  comments "##"
10
+ attr_reader :samples
8
11
 
9
12
  class Preamble
10
13
  def initialize lines
@@ -36,50 +39,49 @@ class VCF < MutationSet::Sample
36
39
  end
37
40
  end
38
41
 
39
- def enforce_headers(array)
42
+ def enforce_header
40
43
  # kludge for empty vcf with no format line
41
- missing = required.map(&:downcase) - array.map(&:downcase)
42
- raise "VCF lacks required headers" if !missing.empty? && !(missing.first == "format" && missing.size == 1)
44
+ super
43
45
 
44
- if array.size > required.size
45
- @samples = array - required
46
+ if header.size > required.size
47
+ @samples = @header - required - [ :format ]
48
+ # recover the original sample name from the sleeve
49
+ new_samples = @samples.map do |s|
50
+ @sleeve[s].to_sym
51
+ end
52
+ @header = @header - @samples + new_samples
53
+ @samples = new_samples
46
54
  end
47
-
48
- @headers = array.map &:to_sym
49
55
  end
50
56
 
51
- class Line < MutationSet::Line
52
- attr_reader :format, :mutation
57
+ class Line < Mutation::Record
58
+ alias_key :seqname, :chrom
53
59
  alias_key :start, :pos
54
- alias_key :ref_allele, :ref
55
- def alt_allele; pick_alt; end
56
- def stop; @stop || end_pos; end
57
- def stop= nc; @stop = nc; end
58
-
59
- def required
60
- sample.required
61
- end
62
-
63
- def initialize(fields, s)
64
- @sample = s
65
- @mutation = Hash[clean_required.zip(fields[0...required.size])]
66
- @mutation[:info] = Hash[@mutation[:info].split(/;/).map do |s|
67
- key, value = s.split(/=/)
68
- value ||= true
69
- [ key.to_sym, value ]
70
- end]
71
- @format = @mutation[:format] = @mutation[:format].split(/:/).map(&:to_sym)
72
-
73
- if @sample.samples
74
- sample_fields = fields[required.size..-1]
75
- @genotypes = {}
76
- @sample.samples.each_with_index do |s,i|
77
- next if !sample_fields[i]
78
- @genotypes[s] = VCF::Genotype.new self, sample_fields[i].split(/:/)
60
+ alias_key :stop, :default_stop
61
+ def initialize(h, s)
62
+ super h, s
63
+
64
+ self.format = self.format.split(/:/).map{|f| f.to_sym} if self.format
65
+
66
+ build_genotypes
67
+ build_muts
68
+ end
69
+
70
+ def build_genotypes
71
+ @genotypes = {}
72
+ if @table.samples
73
+ @table.samples.each do |s|
74
+ @genotypes[s] = VCF::Genotype.new self, self.send(s)
79
75
  end
80
76
  end
81
77
  end
82
78
 
79
+ def build_muts
80
+ @table.samples.each do |s|
81
+ @muts.push Mutation.new(chrom, pos, ref, alt, genotype(s).ref_count, genotype(s).alt_count)
82
+ end
83
+ end
84
+
83
85
  def skip_genotype? g
84
86
  name, geno = g.first
85
87
  geno = genotype(geno)
@@ -91,72 +93,74 @@ class VCF < MutationSet::Sample
91
93
  alt.split(/,/).first
92
94
  end
93
95
 
94
- def end_pos
95
- pos.to_i + ref.length-1
96
- end
97
-
98
- def to_s
99
- clean_required.map{ |h|
100
- case h
101
- when :info
102
- mutation[h].map{|k,v| "#{k}=#{v}" }.join(";")
103
- when :format
104
- mutation[h].join(":")
105
- else
106
- mutation[h]
107
- end
108
- }.join("\t") + "\t" + sample.samples.map{|s| genotype(s).to_s }.join("\t")
96
+ def format_column column
97
+ if column == :format
98
+ self.format.join(":")
99
+ elsif genotype(column)
100
+ genotype(column).to_s
101
+ else
102
+ super(column)
103
+ end
109
104
  end
110
105
 
111
106
  def genotype(s)
112
- @genotypes[s] if @genotypes
113
- end
114
-
115
- def clean_required
116
- sample.clean_headers[0...required.size]
107
+ @genotypes[s.to_sym] if @genotypes
117
108
  end
118
109
  end
110
+ line_class VCF::Line
119
111
 
120
112
  class Genotype
121
- attr_reader :info
122
113
  def initialize(line,field)
123
114
  @line = line
124
- @info = Hash[line.format.zip(field)]
115
+ @hash = Hash[line.format.map(&:downcase).zip(field.split /:/)]
116
+ end
117
+
118
+ def method_missing sym, *args, &block
119
+ if @hash[sym]
120
+ @hash[sym]
121
+ elsif sym.to_s =~ /(.*)=/
122
+ @hash[$1.to_sym] = args.first
123
+ else
124
+ super sym, *args, &block
125
+ end
126
+ end
127
+
128
+ def respond_to_missing? sym, include_all = false
129
+ @hash[sym] || super
125
130
  end
126
131
 
127
132
  def homozygous?
128
- @info[:GT] =~ /0.0/ || @info[:GT] =~ /1.1/
133
+ gt =~ /0.0/ || gt =~ /1.1/
129
134
  end
130
135
 
131
136
  def heterozygous?
132
- @info[:GT] =~ /0.1/ || @info[:GT] =~ /1.0/
137
+ gt =~ /0.1/ || gt =~ /1.0/
133
138
  end
134
139
 
135
140
  def empty?
136
- @info[:GT] =~ /\..\./
141
+ gt =~ /\..\./
137
142
  end
138
143
 
139
144
  def callable?
140
- @info[:GT] !~ /\..\./
145
+ gt !~ /\..\./
141
146
  end
142
147
 
143
- def gt; @info[:GT]; end
144
- def approx_depth; @info[:DP].to_i; end
148
+ def approx_depth; dp.to_i; end
145
149
  def depth; alt_count + ref_count; end
146
- def alt_count; @info[:AD] ? @info[:AD].split(/,/)[1].to_i : nil; end
147
- def ref_count; @info[:AD] ? @info[:AD].split(/,/)[0].to_i : nil; end
150
+ def alt_count; @alt_count ||= respond_to?(:ad) ? ad.split(/,/)[1].to_i : nil; end
151
+ def ref_count; @ref_count ||= respond_to?(:ad) ? ad.split(/,/)[0].to_i : nil; end
148
152
  def alt_freq; alt_count / depth.to_f; end
149
153
  def ref_freq; ref_count / depth.to_f; end
150
154
  def ref_length; @line.ref.length; end
151
155
  def alt_length; @line.alt.length; end
152
- def alt_base_quality; @info[:NQSBQ] ? @info[:NQSBQ].split(/,/)[0].to_f : nil; end
153
- def alt_map_quality; @info[:MQS] ? @info[:MQS].split(/,/)[0].to_f : nil; end
154
- def alt_mismatch_rate; @info[:NQSMM] ? @info[:NQSMM].split(/,/)[0].to_f : nil; end
155
- def alt_mismatch_count; @info[:MM] ? @info[:MM].split(/,/)[0].to_f : nil; end
156
- def quality; @info[:GQ].to_i; end
156
+ def alt_base_quality; respond_to?(:nqsbq) ? nqsbq.split(/,/)[0].to_f : nil; end
157
+ def alt_map_quality; respond_to?(:mqs) ? mqs.split(/,/)[0].to_f : nil; end
158
+ def alt_mismatch_rate; respond_to?(:nqsmm) ? nqsmm.split(/,/)[0].to_f : nil; end
159
+ def alt_mismatch_count; respond_to?(:mm) ? mm.split(/,/)[0].to_f : nil; end
160
+ def quality; gq.to_i; end
157
161
 
158
162
  def to_s
159
- @line.format.map{|f| @info[f]}.join(":")
163
+ @line.format.map(&:downcase).map{|f| @hash[f]}.join(":")
160
164
  end
161
165
  end
162
166
  end
metadata CHANGED
@@ -1,62 +1,55 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: germ
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.1'
5
- prerelease:
4
+ version: '0.3'
6
5
  platform: ruby
7
6
  authors:
8
7
  - Saurabh Asthana
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2014-07-16 00:00:00.000000000 Z
11
+ date: 2014-11-04 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: extlib
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ! '>='
17
+ - - '>='
20
18
  - !ruby/object:Gem::Version
21
19
  version: '0'
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ! '>='
24
+ - - '>='
28
25
  - !ruby/object:Gem::Version
29
26
  version: '0'
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: net-http-persistent
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
- - - ! '>='
31
+ - - '>='
36
32
  - !ruby/object:Gem::Version
37
33
  version: '0'
38
34
  type: :runtime
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
- - - ! '>='
38
+ - - '>='
44
39
  - !ruby/object:Gem::Version
45
40
  version: '0'
46
41
  - !ruby/object:Gem::Dependency
47
42
  name: sequel
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
- - - ! '>='
45
+ - - '>='
52
46
  - !ruby/object:Gem::Version
53
47
  version: '0'
54
48
  type: :runtime
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
51
  requirements:
59
- - - ! '>='
52
+ - - '>='
60
53
  - !ruby/object:Gem::Version
61
54
  version: '0'
62
55
  description: See summary
@@ -67,49 +60,56 @@ extensions:
67
60
  - ext/hash_table_aux/extconf.rb
68
61
  extra_rdoc_files: []
69
62
  files:
70
- - lib/gtf.rb
71
- - lib/mutation_set.rb
72
- - lib/indelocator.rb
73
- - lib/maf.rb
74
- - lib/hash_table.rb
75
- - lib/sam.rb
76
- - lib/germ/config.rb
77
- - lib/germ/printer.rb
78
- - lib/germ/data_types.rb
79
- - lib/germ/flagstat.rb
80
63
  - lib/oncotator.rb
64
+ - lib/gtf/gene.rb
65
+ - lib/sam.rb
66
+ - lib/mutation.rb
67
+ - lib/hash_table.rb
68
+ - lib/genomic_locus.rb
69
+ - lib/intervals.rb
70
+ - lib/genetic_code.rb
71
+ - lib/go.rb
72
+ - lib/germ.rb
73
+ - lib/mutation_set.rb
74
+ - lib/gtf.rb
81
75
  - lib/mutect.rb
82
76
  - lib/vcf.rb
83
- - lib/germ.rb
84
- - lib/intervals.rb
85
77
  - lib/fasta.rb
78
+ - lib/indelocator.rb
79
+ - lib/sdrf.rb
80
+ - lib/tcga.rb
81
+ - lib/fastq.rb
82
+ - lib/maf.rb
83
+ - lib/germ/flagstat.rb
84
+ - lib/germ/data_types.rb
85
+ - lib/germ/printer.rb
86
+ - lib/germ/config.rb
86
87
  - ext/fasta_aux/FastaAux.c
87
88
  - ext/hash_table_aux/HashTableAux.c
88
89
  - ext/fasta_aux/extconf.rb
89
90
  - ext/hash_table_aux/extconf.rb
90
91
  homepage: http://github.com/mountetna/germ
91
92
  licenses: []
93
+ metadata: {}
92
94
  post_install_message:
93
95
  rdoc_options: []
94
96
  require_paths:
95
97
  - lib
96
98
  - ext
97
99
  required_ruby_version: !ruby/object:Gem::Requirement
98
- none: false
99
100
  requirements:
100
- - - ! '>='
101
+ - - '>='
101
102
  - !ruby/object:Gem::Version
102
103
  version: '0'
103
104
  required_rubygems_version: !ruby/object:Gem::Requirement
104
- none: false
105
105
  requirements:
106
- - - ! '>='
106
+ - - '>='
107
107
  - !ruby/object:Gem::Version
108
108
  version: '0'
109
109
  requirements: []
110
110
  rubyforge_project:
111
- rubygems_version: 1.8.23
111
+ rubygems_version: 2.0.6
112
112
  signing_key:
113
- specification_version: 3
113
+ specification_version: 4
114
114
  summary: Collection of utilities for use in computational genomics
115
115
  test_files: []