cheripic 0.1.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/README.md +9 -4
- data/cheripic.gemspec +9 -8
- data/lib/cheripic/bfr.rb +84 -0
- data/lib/cheripic/cmd.rb +26 -33
- data/lib/cheripic/contig.rb +66 -0
- data/lib/cheripic/contig_pileups.rb +143 -0
- data/lib/cheripic/implementer.rb +80 -0
- data/lib/cheripic/options.rb +42 -0
- data/lib/cheripic/pileup.rb +186 -0
- data/lib/cheripic/regions.rb +46 -0
- data/lib/cheripic/variants.rb +201 -0
- data/lib/cheripic/version.rb +1 -1
- data/lib/cheripic.rb +11 -2
- metadata +80 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ace583d5063ea92f69faa4430b71f0bb0f654528
|
4
|
+
data.tar.gz: 00ae530b7c5c162aa0e699cae2c5dcaa9d159673
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c179df9e44bdff364c8c9c7dd2a779609b05ed3d2f0ef5385c5c5ebb4910a98c35a05d861851dea3f34b14ab8d188994e6b7f254dde2b9356e73dbff08386cf0
|
7
|
+
data.tar.gz: f21ee021e4594bacaf319170746ad7655b0c579cb0c49495bf33dbef9dabd059f8d2e81edf44d64d24cea1f86a3e6315782d87fd8e475cf5698d35dce1bd3079
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,13 +1,18 @@
|
|
1
1
|
# Cheripic
|
2
2
|
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/cheripic.svg)](https://badge.fury.io/rb/cheripic)
|
3
4
|
[![Build Status](https://travis-ci.org/shyamrallapalli/cheripic.svg?branch=master)](https://travis-ci.org/shyamrallapalli/cheripic)
|
4
5
|
[![Coverage Status](https://coveralls.io/repos/github/shyamrallapalli/cheripic/badge.svg?branch=master)](https://coveralls.io/github/shyamrallapalli/cheripic?branch=master)
|
6
|
+
[![Code Climate](https://codeclimate.com/github/shyamrallapalli/cheripic/badges/gpa.svg)](https://codeclimate.com/github/shyamrallapalli/cheripic)
|
5
7
|
|
6
8
|
|
7
|
-
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/cheripic`. To experiment with that code, run `bin/console` for an interactive prompt.
|
8
|
-
|
9
|
-
TODO: Delete this and the text above, and describe your gem
|
10
9
|
|
10
|
+
Computing Homozygosity Enriched Regions In genomes to Prioritize Identification of Candidate variants (CHERIPIC),
|
11
|
+
is a ruby tools to pick causative mutation from bulks segregant sequencing.
|
12
|
+
|
13
|
+
Currently this gem is still in development and nearing complete working package.
|
14
|
+
|
15
|
+
|
11
16
|
## Installation
|
12
17
|
|
13
18
|
Add this line to your application's Gemfile:
|
@@ -36,7 +41,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
36
41
|
|
37
42
|
## Contributing
|
38
43
|
|
39
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
44
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/shyamrallapalli/cheripic. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](contributor-covenant.org) code of conduct.
|
40
45
|
|
41
46
|
|
42
47
|
## License
|
data/cheripic.gemspec
CHANGED
@@ -19,18 +19,19 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
20
|
spec.require_paths = ['lib']
|
21
21
|
|
22
|
-
spec.
|
23
|
-
spec.
|
24
|
-
spec.
|
22
|
+
spec.add_runtime_dependency 'yell', '~> 2.0', '>= 2.0.5'
|
23
|
+
spec.add_runtime_dependency 'trollop', '~> 2.1', '>= 2.1.2'
|
24
|
+
spec.add_runtime_dependency 'bio', '~> 1.5', '>= 1.5.0'
|
25
25
|
# spec.add_dependency 'bio-samtools', '~> 2.3.3'
|
26
|
-
|
27
|
-
spec.
|
26
|
+
spec.add_dependency 'bio-gngm', '~> 0.2.1'
|
27
|
+
spec.add_runtime_dependency 'rinruby', '~> 2.0', '>= 2.0.3'
|
28
28
|
|
29
|
+
spec.add_development_dependency 'activesupport', '~> 4.2.6'
|
29
30
|
spec.add_development_dependency 'bundler', '~> 1.10'
|
30
31
|
spec.add_development_dependency 'rake', '~> 10.0'
|
31
32
|
spec.add_development_dependency 'minitest'
|
32
33
|
spec.add_development_dependency 'minitest-reporters', '>= 1.0.17'
|
33
|
-
spec.add_development_dependency 'simplecov', '>= 0.8.2'
|
34
|
-
spec.add_development_dependency 'shoulda', '>= 3.5.0'
|
35
|
-
spec.add_development_dependency 'coveralls', '>= 0.7.2'
|
34
|
+
spec.add_development_dependency 'simplecov', '~> 0.8', '>= 0.8.2'
|
35
|
+
spec.add_development_dependency 'shoulda', '~> 3.5', '>= 3.5.0'
|
36
|
+
spec.add_development_dependency 'coveralls', '~> 0.7', '>= 0.7.2'
|
36
37
|
end
|
data/lib/cheripic/bfr.rb
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Cheripic
|
4
|
+
|
5
|
+
class BfrError < CheripicError; end
|
6
|
+
|
7
|
+
class Bfr
|
8
|
+
|
9
|
+
attr_accessor :bfr_adj
|
10
|
+
|
11
|
+
# get bulk frequency ratio (bfr) for marked hemi snps only
|
12
|
+
# ignore positions with complex variants
|
13
|
+
def self.get_bfr(mut_hash, bg_hash='')
|
14
|
+
@bfr_adj = Options.params.bfr_adjust
|
15
|
+
if bg_hash != ''
|
16
|
+
# checking if only two vars in base hash and that includes ref
|
17
|
+
# checking if only one var in hemi snp
|
18
|
+
# suggests enrichment for one of two alleles
|
19
|
+
if mut_hash.length == 2 and mut_hash.key?(:ref)
|
20
|
+
bfr = calculate_bfr(mut_hash, bg_hash)
|
21
|
+
elsif bg_hash.length == 2 and bg_hash.key?(:ref)
|
22
|
+
bfr = calculate_bfr(bg_hash, mut_hash)
|
23
|
+
elsif mut_hash.length == 1 and mut_hash[:ref] == nil
|
24
|
+
bfr = calculate_bfr(mut_hash, bg_hash)
|
25
|
+
elsif bg_hash.length == 1 and bg_hash[:ref] == nil
|
26
|
+
bfr = calculate_bfr(bg_hash, mut_hash)
|
27
|
+
else # complex
|
28
|
+
bfr = ''
|
29
|
+
end
|
30
|
+
elsif mut_hash.length == 2 and mut_hash.key?(:ref)
|
31
|
+
bfr = calc_fraction(mut_hash)[0]/ @bfr_adj
|
32
|
+
elsif mut_hash.length == 1 and mut_hash[:ref] == nil
|
33
|
+
bfr = calc_fraction(mut_hash)[0]/ @bfr_adj
|
34
|
+
else
|
35
|
+
bfr = ''
|
36
|
+
end
|
37
|
+
bfr
|
38
|
+
end
|
39
|
+
|
40
|
+
# calculate bfr using both mutant and background bulk information
|
41
|
+
def self.calculate_bfr(two_key_hash, other_hash)
|
42
|
+
# fix :ref value if absent due to below noise depth
|
43
|
+
unless two_key_hash.key?(:ref)
|
44
|
+
two_key_hash[:ref] = 0
|
45
|
+
end
|
46
|
+
unless other_hash.key?(:ref)
|
47
|
+
other_hash[:ref] = 0
|
48
|
+
end
|
49
|
+
frac_1, base = calc_fraction(two_key_hash)
|
50
|
+
if other_hash.key?(base)
|
51
|
+
sum = other_hash[base] + other_hash[:ref] + @bfr_adj
|
52
|
+
frac_2 = (other_hash[base] + @bfr_adj)/sum
|
53
|
+
else
|
54
|
+
sum = other_hash[:ref] + @bfr_adj
|
55
|
+
frac_2 = @bfr_adj/sum
|
56
|
+
end
|
57
|
+
# making sure ratio is always 1 or grater
|
58
|
+
if frac_1 > frac_2
|
59
|
+
bfr = frac_1/frac_2
|
60
|
+
else
|
61
|
+
bfr = frac_2/frac_1
|
62
|
+
end
|
63
|
+
bfr
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.calc_fraction(hash)
|
67
|
+
unless hash.key?(:ref)
|
68
|
+
hash[:ref] = 0
|
69
|
+
end
|
70
|
+
array = hash.keys
|
71
|
+
sum = hash[array[0]] + hash[array[1]] + @bfr_adj
|
72
|
+
if array[0] == :ref
|
73
|
+
frac = (hash[array[1]] + @bfr_adj)/sum
|
74
|
+
base = array[1]
|
75
|
+
else
|
76
|
+
frac = (hash[array[0]] + @bfr_adj)/sum
|
77
|
+
base = array[0]
|
78
|
+
end
|
79
|
+
[frac, base]
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
data/lib/cheripic/cmd.rb
CHANGED
@@ -8,13 +8,14 @@ module Cheripic
|
|
8
8
|
require 'pathname'
|
9
9
|
require 'ostruct'
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
11
|
+
attr_accessor :options
|
12
|
+
|
13
|
+
def initialize(args)
|
14
|
+
@options = parse_arguments(args)
|
14
15
|
check_arguments
|
15
16
|
end
|
16
17
|
|
17
|
-
def parse_arguments
|
18
|
+
def parse_arguments(args)
|
18
19
|
Trollop::with_standard_exception_handling argument_parser do
|
19
20
|
if args.empty? || args.include?('-h') || args.include?('--help')
|
20
21
|
raise Trollop::HelpNeeded
|
@@ -45,7 +46,7 @@ module Cheripic
|
|
45
46
|
opt :output, 'Directory to store results, will be created if not existing',
|
46
47
|
:default => 'cheripic_results'
|
47
48
|
opt :loglevel, 'Choose any one of "info / warn / debug" level for logs generated',
|
48
|
-
:default => '
|
49
|
+
:default => 'debug'
|
49
50
|
opt :hmes_adjust, 'factor added to snp count of each contig to adjust for hme score calculations',
|
50
51
|
:type => Float,
|
51
52
|
:default => 0.5
|
@@ -98,7 +99,7 @@ module Cheripic
|
|
98
99
|
:short => '-r',
|
99
100
|
:type => String,
|
100
101
|
:default => ''
|
101
|
-
opt :
|
102
|
+
opt :bfr_adjust, 'factor added to hemi snp frequency of each parent to adjust for bfr calculations',
|
102
103
|
:type => Float,
|
103
104
|
:default => 0.05
|
104
105
|
opt :examples, 'shows some example commands with explanation'
|
@@ -140,19 +141,20 @@ OPTIONS:
|
|
140
141
|
end
|
141
142
|
|
142
143
|
def check_arguments
|
143
|
-
|
144
|
-
|
145
|
-
|
144
|
+
check_output_dir
|
145
|
+
check_log_level
|
146
|
+
check_input_files
|
146
147
|
end
|
147
148
|
|
149
|
+
# TODO: check bulk input types and process associated files
|
148
150
|
# def check_input_types
|
149
|
-
# if @options
|
151
|
+
# if @options[:input_format] == 'vcf'
|
150
152
|
#
|
151
153
|
# end
|
152
154
|
# end
|
153
155
|
|
154
|
-
def
|
155
|
-
if @options
|
156
|
+
def check_input_files
|
157
|
+
if @options[:polyploidy]
|
156
158
|
inputfiles = %i{assembly mut_bulk bg_bulk mut_parent bg_parent}
|
157
159
|
else
|
158
160
|
inputfiles = %i{assembly mut_bulk bg_bulk}
|
@@ -171,35 +173,26 @@ OPTIONS:
|
|
171
173
|
end
|
172
174
|
end
|
173
175
|
|
174
|
-
def
|
175
|
-
if Dir.exist?(@options
|
176
|
-
raise CheripicArgError.new "#{@options
|
176
|
+
def check_output_dir
|
177
|
+
if Dir.exist?(@options[:output])
|
178
|
+
raise CheripicArgError.new "#{@options[:output]} directory exists" +
|
177
179
|
'please choose a different output directory name'
|
178
180
|
end
|
179
181
|
end
|
180
182
|
|
181
|
-
def
|
182
|
-
unless %w(error info warn debug).include?(@options
|
183
|
-
raise CheripicArgError.new "Loglevel #{@options
|
183
|
+
def check_log_level
|
184
|
+
unless %w(error info warn debug).include?(@options[:loglevel])
|
185
|
+
raise CheripicArgError.new "Loglevel #{@options[:loglevel]} is not valid. " +
|
184
186
|
'It must be one of: error, info, warn, debug.'
|
185
187
|
end
|
186
|
-
logger.level = Yell::Level.new @options
|
188
|
+
logger.level = Yell::Level.new @options[:loglevel].to_sym
|
187
189
|
end
|
188
190
|
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
# end
|
195
|
-
#
|
196
|
-
# def analyse_bulks
|
197
|
-
# assembly = @options.assembly
|
198
|
-
# logger.info "Loading assembly: #{assembly}"
|
199
|
-
# # a = Assembly.new assembly
|
200
|
-
# logger.info "Analysing assembly: #{assembly}"
|
201
|
-
#
|
202
|
-
# end
|
191
|
+
def run
|
192
|
+
@options[:output] = File.expand_path @options[:output]
|
193
|
+
analysis = Implementer.new(@options)
|
194
|
+
analysis.run
|
195
|
+
end
|
203
196
|
|
204
197
|
end # Cmd
|
205
198
|
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'bio'
|
3
|
+
require 'forwardable'
|
4
|
+
|
5
|
+
module Cheripic
|
6
|
+
|
7
|
+
class ContigError < CheripicError; end
|
8
|
+
|
9
|
+
class Contig
|
10
|
+
|
11
|
+
include Enumerable
|
12
|
+
extend Forwardable
|
13
|
+
# delegate [:size, :length] => :@contig
|
14
|
+
# def_delegator :@contig, :entry_id, :id
|
15
|
+
attr_accessor :hm_pos, :ht_pos, :hemi_pos, :id, :length
|
16
|
+
|
17
|
+
def initialize (fasta)
|
18
|
+
@id = fasta.entry_id
|
19
|
+
@length = fasta.length
|
20
|
+
@hm_pos = {}
|
21
|
+
@ht_pos = {}
|
22
|
+
@hemi_pos = {}
|
23
|
+
end
|
24
|
+
|
25
|
+
def hm_num
|
26
|
+
self.hm_pos.length
|
27
|
+
end
|
28
|
+
|
29
|
+
def ht_num
|
30
|
+
self.ht_pos.length
|
31
|
+
end
|
32
|
+
|
33
|
+
def hme_score
|
34
|
+
hmes_adjust = Options.params.hmes_adjust
|
35
|
+
if self.hm_num == 0 and self.ht_num == 0
|
36
|
+
0.0
|
37
|
+
else
|
38
|
+
(self.hm_num + hmes_adjust) / (self.ht_num + hmes_adjust)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def hemi_num
|
43
|
+
self.hemi_pos.length
|
44
|
+
end
|
45
|
+
|
46
|
+
def bfr_score
|
47
|
+
if self.hemi_pos.values.empty?
|
48
|
+
0.0
|
49
|
+
else
|
50
|
+
geom_mean(self.hemi_pos.values)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# geometric mean of an array of numbers
|
55
|
+
def geom_mean(array)
|
56
|
+
return array[0].to_f if array.length == 1
|
57
|
+
array.reduce(:+) / array.size.to_f
|
58
|
+
# sum = 0.0
|
59
|
+
# array.each{ |v| sum += Math.log(v.to_f) }
|
60
|
+
# sum /= array.size
|
61
|
+
# Math.exp sum
|
62
|
+
end
|
63
|
+
|
64
|
+
end # Contig
|
65
|
+
|
66
|
+
end # Cheripic
|
@@ -0,0 +1,143 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'bio'
|
3
|
+
require 'forwardable'
|
4
|
+
|
5
|
+
module Cheripic
|
6
|
+
|
7
|
+
class ContigPileupsError < CheripicError; end
|
8
|
+
|
9
|
+
class ContigPileups
|
10
|
+
|
11
|
+
include Enumerable
|
12
|
+
extend Forwardable
|
13
|
+
def_delegators :@mut_bulk, :each, :each_key, :each_value, :length, :[], :store
|
14
|
+
def_delegators :@bg_bulk, :each, :each_key, :each_value, :length, :[], :store
|
15
|
+
def_delegators :@mut_parent, :each, :each_key, :each_value, :length, :[], :store
|
16
|
+
def_delegators :@bg_parent, :each, :each_key, :each_value, :length, :[], :store
|
17
|
+
attr_accessor :id, :parent_hemi
|
18
|
+
attr_accessor :mut_bulk, :bg_bulk, :mut_parent, :bg_parent
|
19
|
+
|
20
|
+
def initialize (fasta)
|
21
|
+
@id = fasta
|
22
|
+
@mut_bulk = {}
|
23
|
+
@bg_bulk = {}
|
24
|
+
@mut_parent = {}
|
25
|
+
@bg_parent = {}
|
26
|
+
@parent_hemi = {}
|
27
|
+
end
|
28
|
+
|
29
|
+
def bulks_compared
|
30
|
+
@hm_pos = {}
|
31
|
+
@ht_pos = {}
|
32
|
+
@hemi_pos = {}
|
33
|
+
@mut_bulk.each_key do | pos |
|
34
|
+
if Options.params.polyploidy and @parent_hemi.key?(pos)
|
35
|
+
bg_bases = ''
|
36
|
+
if @bg_bulk.key?(pos)
|
37
|
+
bg_bases = @bg_bulk[pos].var_base_frac
|
38
|
+
end
|
39
|
+
mut_bases = @mut_bulk[pos].var_base_frac
|
40
|
+
bfr = Bfr.get_bfr(mut_bases, bg_bases)
|
41
|
+
@hemi_pos[pos] = bfr
|
42
|
+
else
|
43
|
+
self.compare_pileup(pos)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
[@hm_pos, @ht_pos, @hemi_pos]
|
47
|
+
end
|
48
|
+
|
49
|
+
# we are only dealing with single element hashes
|
50
|
+
# so discard hashes with more than one element and empty hashes
|
51
|
+
# empty hash results from position below selected coverage or bases freq below noise
|
52
|
+
def compare_pileup(pos)
|
53
|
+
base_hash = @mut_bulk[pos].var_base_frac
|
54
|
+
base_hash.delete(:ref)
|
55
|
+
return nil if base_hash.empty?
|
56
|
+
# we could ignore complex loci or
|
57
|
+
# take the variant type based on predominant base
|
58
|
+
if base_hash.length > 1
|
59
|
+
mut_type, ratio = var_mode(base_hash.values.max)
|
60
|
+
else
|
61
|
+
base = base_hash.keys[0]
|
62
|
+
mut_type, ratio = var_mode(base_hash[base])
|
63
|
+
end
|
64
|
+
if @bg_bulk.key?(pos)
|
65
|
+
bg_type = bg_bulk_var(pos)
|
66
|
+
mut_type = compare_var_type(mut_type, bg_type)
|
67
|
+
end
|
68
|
+
unless mut_type == nil
|
69
|
+
categorise_pos(mut_type, pos, ratio)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# if both bulks have homozygous var at this position
|
74
|
+
# then ignore the position
|
75
|
+
def compare_var_type(muttype, bgtype)
|
76
|
+
if muttype == :hom and bgtype == :hom
|
77
|
+
nil
|
78
|
+
else
|
79
|
+
muttype
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def bg_bulk_var(pos)
|
84
|
+
bg_base_hash = @bg_bulk[pos].var_base_frac
|
85
|
+
if bg_base_hash.length > 1
|
86
|
+
# taking only var mode
|
87
|
+
var_mode(bg_base_hash.values.max)[0]
|
88
|
+
else
|
89
|
+
# taking only var mode
|
90
|
+
var_mode(bg_base_hash[0])[0]
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def categorise_pos(var_type, pos, ratio)
|
95
|
+
if var_type == :hom
|
96
|
+
@hm_pos[pos] = ratio
|
97
|
+
elsif var_type == :het
|
98
|
+
@ht_pos[pos] = ratio
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# calculate var zygosity for non-polyploid variants
|
103
|
+
# increased range is used for heterozygosity for RNA-seq data
|
104
|
+
def var_mode(ratio)
|
105
|
+
ht_low = Options.params.htlow
|
106
|
+
ht_high = Options.params.hthigh
|
107
|
+
mode = ''
|
108
|
+
if ratio.between?(ht_low, ht_high)
|
109
|
+
mode = :het
|
110
|
+
elsif ratio > ht_high
|
111
|
+
mode = :hom
|
112
|
+
end
|
113
|
+
[mode, ratio]
|
114
|
+
end
|
115
|
+
|
116
|
+
def hemisnps_in_parent
|
117
|
+
# mark all the hemi snp based on both parents
|
118
|
+
self.mut_parent.each_key do |pos|
|
119
|
+
mut_parent_frac = @mut_parent[pos].var_base_frac
|
120
|
+
if self.bg_parent.key?(pos)
|
121
|
+
bg_parent_frac = @bg_parent[pos].var_base_frac
|
122
|
+
bfr = Bfr.get_bfr(mut_parent_frac, bg_parent_frac)
|
123
|
+
@parent_hemi[pos] = bfr
|
124
|
+
self.bg_parent.delete(pos)
|
125
|
+
else
|
126
|
+
bfr = Bfr.get_bfr(mut_parent_frac)
|
127
|
+
@parent_hemi[pos] = bfr
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
# now include all hemi snp unique to background parent
|
132
|
+
self.bg_parent.each_key do |pos|
|
133
|
+
unless @parent_hemi.key?(pos)
|
134
|
+
bg_parent_frac = @bg_parent[pos].var_base_frac
|
135
|
+
bfr = Bfr.get_bfr(bg_parent_frac)
|
136
|
+
@parent_hemi[pos] = bfr
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
module Cheripic
|
4
|
+
|
5
|
+
class ImplementerError < CheripicError; end
|
6
|
+
|
7
|
+
class Implementer
|
8
|
+
|
9
|
+
require 'ostruct'
|
10
|
+
require 'fileutils'
|
11
|
+
attr_accessor :options, :variants
|
12
|
+
|
13
|
+
def initialize(inputs)
|
14
|
+
set1 = %i{assembly
|
15
|
+
input_format
|
16
|
+
mut_bulk
|
17
|
+
bg_bulk
|
18
|
+
output
|
19
|
+
mut_parent
|
20
|
+
bg_parent}
|
21
|
+
@options = OpenStruct.new(inputs.select { |k| set1.include?(k) })
|
22
|
+
|
23
|
+
set2 = %i{hmes_adjust
|
24
|
+
htlow
|
25
|
+
hthigh
|
26
|
+
mindepth
|
27
|
+
min_non_ref_count
|
28
|
+
min_indel_count_support
|
29
|
+
ignore_reference_n
|
30
|
+
mapping_quality
|
31
|
+
base_quality
|
32
|
+
noise
|
33
|
+
cross_type
|
34
|
+
only_frag_with_vars
|
35
|
+
filter_out_low_hmes
|
36
|
+
polyploidy
|
37
|
+
bfr_adjust}
|
38
|
+
settings = inputs.select { |k| set2.include?(k) }
|
39
|
+
Options.update(settings)
|
40
|
+
FileUtils.mkdir_p @options.output
|
41
|
+
end
|
42
|
+
|
43
|
+
def extract_vars
|
44
|
+
@variants = Variants.new(@options)
|
45
|
+
@variants.compare_pileups
|
46
|
+
end
|
47
|
+
|
48
|
+
def process_variants
|
49
|
+
@variants.verify_bg_bulk_pileup
|
50
|
+
# print selected variants that could be potential markers or mutation
|
51
|
+
out_file = File.open("#{@options.output}/selected_variants.txt", 'w')
|
52
|
+
out_file.puts "HME_Score\tAlleleFreq\tseq_id\tposition\tref_base\tcoverage\tbases\tbase_quals\tsequence_left\tAlt_seq\tsequence_right"
|
53
|
+
regions = Regions.new(@options.assembly)
|
54
|
+
@variants.hmes_frags.each_key do | frag |
|
55
|
+
contig_obj = @variants.assembly[frag]
|
56
|
+
positions = contig_obj.hm_pos.keys
|
57
|
+
positions.each do | pos |
|
58
|
+
pileup = @variants.pileups[frag].mut_bulk[pos]
|
59
|
+
seqs = regions.fetch_seq(frag,pos)
|
60
|
+
out_file.puts "#{contig_obj.hme_score}\t#{contig_obj.hm_pos[pos]}\t#{pileup.to_s.chomp}\t#{seqs[0]}\t#{pileup.consensus}\t#{seqs[1]}"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
out_file.close
|
64
|
+
end
|
65
|
+
|
66
|
+
def run
|
67
|
+
unless defined?(@variants.has_run)
|
68
|
+
self.extract_vars
|
69
|
+
end
|
70
|
+
if Options.params.polyploidy
|
71
|
+
self.process_variants
|
72
|
+
@variants.bfr_frags
|
73
|
+
else
|
74
|
+
self.process_variants
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
module Cheripic
|
4
|
+
|
5
|
+
class Options
|
6
|
+
|
7
|
+
require 'ostruct'
|
8
|
+
# class << self; attr_accessor :params end
|
9
|
+
|
10
|
+
@defaults = {
|
11
|
+
:hmes_adjust => 0.5,
|
12
|
+
:htlow => 0.2,
|
13
|
+
:hthigh => 0.9,
|
14
|
+
:mindepth => 6,
|
15
|
+
:min_non_ref_count => 3,
|
16
|
+
:min_indel_count_support => 3,
|
17
|
+
:ignore_reference_n => true,
|
18
|
+
:mapping_quality => 20,
|
19
|
+
:base_quality => 15,
|
20
|
+
:noise => 0.1,
|
21
|
+
:cross_type => 'back',
|
22
|
+
:only_frag_with_vars => true,
|
23
|
+
:filter_out_low_hmes => true,
|
24
|
+
:polyploidy => false,
|
25
|
+
:bfr_adjust => 0.05,
|
26
|
+
:sel_seq_len => 50
|
27
|
+
}
|
28
|
+
# @params = OpenStruct.new(@defaults)
|
29
|
+
|
30
|
+
def self.update(newset)
|
31
|
+
@defaults.merge!(newset)
|
32
|
+
self.params
|
33
|
+
# @params = OpenStruct.new(@defaults)
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.params
|
37
|
+
OpenStruct.new(@defaults)
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
@@ -0,0 +1,186 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'bio'
|
3
|
+
require 'bio-samtools'
|
4
|
+
require 'bio/db/pileup'
|
5
|
+
|
6
|
+
class Pileup < Bio::DB::Pileup
|
7
|
+
|
8
|
+
attr_accessor :defaults
|
9
|
+
|
10
|
+
def initialize(string, opts={})
|
11
|
+
super(string)
|
12
|
+
set_defaults(opts)
|
13
|
+
adj_read_bases
|
14
|
+
@indelbases = 'acgtryswkmbdhvnACGTRYSWKMBDHVN'
|
15
|
+
end
|
16
|
+
|
17
|
+
def set_defaults(opts)
|
18
|
+
@defaults = {
|
19
|
+
noise: 0.1, # noise level for read depth
|
20
|
+
ht_low: 0.2, # min allele freq for heterozygosity
|
21
|
+
ht_high: 0.9, # max allele freq for heterozygosity
|
22
|
+
min_depth: 6, # minimum coverage for variant
|
23
|
+
min_non_ref_count: 3,
|
24
|
+
ignore_reference_n: true,
|
25
|
+
min_indel_count_support: 3,
|
26
|
+
}
|
27
|
+
@defaults.merge(opts)
|
28
|
+
end
|
29
|
+
|
30
|
+
# removes mapping quality information
|
31
|
+
def adj_read_bases
|
32
|
+
# mapping quality after '^' symbol is substituted
|
33
|
+
# to avoid splitting at non indel + or - characters
|
34
|
+
# read ends marking by '$' symbol is substituted
|
35
|
+
# insertion and deletion marking by '*' symbol is substituted
|
36
|
+
self.read_bases.gsub!(/\^./, '')
|
37
|
+
self.read_bases.delete! '$'
|
38
|
+
self.read_bases.delete! '*'
|
39
|
+
# warn about reads with ambiguous codes
|
40
|
+
# if self.read_bases.match(/[^atgcATGC,\.\+\-0-9]/)
|
41
|
+
# warn "Ambiguous nucleotide\t#{self.read_bases}"
|
42
|
+
# end
|
43
|
+
end
|
44
|
+
|
45
|
+
# count bases matching reference and non-reference
|
46
|
+
# from snp variant and make a hash of bases with counts
|
47
|
+
# for indels return the read bases information instead
|
48
|
+
def bases_hash
|
49
|
+
if self.read_bases =~ /\+/
|
50
|
+
bases_hash = indels_to_hash('+')
|
51
|
+
elsif self.read_bases =~ /\-/
|
52
|
+
bases_hash = indels_to_hash('-')
|
53
|
+
else
|
54
|
+
bases_hash = snp_base_hash(self.read_bases)
|
55
|
+
end
|
56
|
+
# some indels will have ref base in the read and using
|
57
|
+
# sum of hash values is going to give wrong additional coverage
|
58
|
+
# from indels so including actual coverage from pileup
|
59
|
+
# bases_hash keys are :A, :C, :G, :T, :N, :ref, :indel and :cov
|
60
|
+
bases_hash[:cov] = self.coverage
|
61
|
+
bases_hash
|
62
|
+
end
|
63
|
+
|
64
|
+
# count bases from indels
|
65
|
+
# array of pileup bases is split at + / -
|
66
|
+
# and number after each + / - is counted
|
67
|
+
def count_indel_bases(delimiter)
|
68
|
+
array = self.read_bases.split(delimiter)
|
69
|
+
number = 0
|
70
|
+
array.shift
|
71
|
+
array.each do |element|
|
72
|
+
# deletions in reference could contain ambiguous codes,
|
73
|
+
number += /^(\d+)[#{@indelbases}]/.match(element)[1].to_i
|
74
|
+
end
|
75
|
+
number
|
76
|
+
end
|
77
|
+
|
78
|
+
# count bases matching reference and non-reference
|
79
|
+
# and calculate ratio of non_ref allele to total bases
|
80
|
+
def non_ref_count
|
81
|
+
read_bases = self.read_bases
|
82
|
+
if read_bases =~ /\+/
|
83
|
+
non_ref_count = indel_non_ref_count('+')
|
84
|
+
elsif read_bases =~ /\-/
|
85
|
+
non_ref_count = indel_non_ref_count('-')
|
86
|
+
else
|
87
|
+
non_ref_count = read_bases.count('atgcATGC')
|
88
|
+
end
|
89
|
+
non_ref_count
|
90
|
+
end
|
91
|
+
|
92
|
+
# check if the pileup has the parameters we are looking for
|
93
|
+
def is_var
|
94
|
+
ignore_reference_n = @defaults[:ignore_reference_n]
|
95
|
+
min_depth = @defaults[:min_depth]
|
96
|
+
min_non_ref_count = @defaults[:min_non_ref_count]
|
97
|
+
|
98
|
+
return false if self.ref_base == '*'
|
99
|
+
return false if ignore_reference_n and self.ref_base =~ /^[nN]$/
|
100
|
+
return true if self.coverage >= min_depth and self.non_ref_count >= min_non_ref_count
|
101
|
+
false
|
102
|
+
end
|
103
|
+
|
104
|
+
# count bases matching reference and non-reference
|
105
|
+
# and calculate ratio of non_ref allele to total bases
|
106
|
+
def non_ref_ratio
|
107
|
+
self.non_ref_count.to_f / self.coverage.to_f
|
108
|
+
end
|
109
|
+
|
110
|
+
# calculate var zygosity for non-polyploid variants
|
111
|
+
# increased range is used for heterozygosity for RNA-seq data
|
112
|
+
def var_mode
|
113
|
+
ht_low = @defaults[:ht_low]
|
114
|
+
ht_high = @defaults[:ht_high]
|
115
|
+
mode = ''
|
116
|
+
if self.non_ref_ratio.between?(ht_low, ht_high)
|
117
|
+
mode = :het
|
118
|
+
elsif self.non_ref_ratio > ht_high
|
119
|
+
mode = :hom
|
120
|
+
end
|
121
|
+
mode
|
122
|
+
end
|
123
|
+
|
124
|
+
# form hash of base information, [ATGC] counts for snp
|
125
|
+
# a hash of base proportion is calculated
|
126
|
+
# base proportion hash below a selected depth is empty
|
127
|
+
# base proportion below or equal to a noise factor are discarded
|
128
|
+
def var_base_frac
|
129
|
+
hash = self.bases_hash
|
130
|
+
snp_hash = {}
|
131
|
+
coverage = hash[:cov]
|
132
|
+
return snp_hash if coverage < @defaults[:min_depth]
|
133
|
+
# calculate proportion of each base in coverage
|
134
|
+
hash.each_key do | base |
|
135
|
+
next if base == :cov
|
136
|
+
freq = hash[base].to_f/coverage.to_f
|
137
|
+
next if freq <= @defaults[:noise]
|
138
|
+
snp_hash[base] = freq
|
139
|
+
end
|
140
|
+
snp_hash
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
private
|
145
|
+
|
146
|
+
# count number of indels and number non-indel base
|
147
|
+
# and return a hash with bases and indel counts
|
148
|
+
def indels_to_hash(delimiter)
|
149
|
+
non_indel_bases = String.new
|
150
|
+
array = self.read_bases.split(delimiter)
|
151
|
+
non_indel_bases << array.shift
|
152
|
+
array.each do |element|
|
153
|
+
# get number of nucleotides inserted or deleted
|
154
|
+
number = /^(\d+)[#{@indelbases}]/.match(element)[1].to_i
|
155
|
+
# capture remaining nucleotides
|
156
|
+
non_indel_bases << element.gsub(/^#{number}\w{#{number}}/, '')
|
157
|
+
end
|
158
|
+
bases_hash = snp_base_hash(non_indel_bases)
|
159
|
+
# check at least three reads are supporting indel
|
160
|
+
indel_count = self.read_bases.count(delimiter)
|
161
|
+
if indel_count >= @defaults[:min_indel_count_support]
|
162
|
+
bases_hash[:indel] = indel_count
|
163
|
+
end
|
164
|
+
bases_hash
|
165
|
+
end
|
166
|
+
|
167
|
+
def snp_base_hash(readbases)
|
168
|
+
non_indel_base_hash = {}
|
169
|
+
non_indel_base_hash[:ref] = readbases.count('.,')
|
170
|
+
non_indel_base_hash[:A] = readbases.count('aA')
|
171
|
+
non_indel_base_hash[:C] = readbases.count('cC')
|
172
|
+
non_indel_base_hash[:G] = readbases.count('gG')
|
173
|
+
non_indel_base_hash[:T] = readbases.count('tT')
|
174
|
+
# non_indel_base_hash[:N] = read_bases.count('nN')
|
175
|
+
non_indel_base_hash
|
176
|
+
end
|
177
|
+
|
178
|
+
def indel_non_ref_count(delimitter)
|
179
|
+
read_bases = self.read_bases
|
180
|
+
non_ref_count = read_bases.count(@indelbases)
|
181
|
+
indelcounts = read_bases.count(delimitter)
|
182
|
+
indel_bases = count_indel_bases(delimitter)
|
183
|
+
non_ref_count + indelcounts - indel_bases
|
184
|
+
end
|
185
|
+
|
186
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'bio-samtools'
|
3
|
+
require 'forwardable'
|
4
|
+
|
5
|
+
module Cheripic
|
6
|
+
|
7
|
+
class RegionsError < CheripicError; end
|
8
|
+
|
9
|
+
class Regions
|
10
|
+
|
11
|
+
include Enumerable
|
12
|
+
extend Forwardable
|
13
|
+
def_delegators :@id_len, :each, :each_key, :each_value, :length, :[]
|
14
|
+
attr_accessor :reference_db, :id_len
|
15
|
+
|
16
|
+
def initialize(assembly)
|
17
|
+
@reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>assembly})
|
18
|
+
@id_len = {}
|
19
|
+
self.get_id_len
|
20
|
+
end
|
21
|
+
|
22
|
+
def get_id_len
|
23
|
+
@reference_db.load_fai_entries
|
24
|
+
@reference_db.index.entries.each_entry do | entry |
|
25
|
+
@id_len[entry.id] = entry.length
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# for each id and position returns left and right sequence
|
30
|
+
# of pre-selected length
|
31
|
+
def fetch_seq(id, pos)
|
32
|
+
limit = Options.params.sel_seq_len + 1
|
33
|
+
len = @id_len[id]
|
34
|
+
low = pos-limit <= 0 ? 0 : pos-limit
|
35
|
+
high = pos+limit >= len ? len : pos+limit
|
36
|
+
region = Bio::DB::Fasta::Region.parse_region("#{id}:#{low}-#{pos-1}")
|
37
|
+
seq_left = @reference_db.fetch_sequence(region)
|
38
|
+
region = Bio::DB::Fasta::Region.parse_region("#{id}:#{pos+1}-#{high}")
|
39
|
+
seq_right = @reference_db.fetch_sequence(region)
|
40
|
+
[seq_left, seq_right]
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
@@ -0,0 +1,201 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'bio'
|
3
|
+
require 'forwardable'
|
4
|
+
|
5
|
+
module Cheripic
|
6
|
+
|
7
|
+
class VariantsError < CheripicError; end
|
8
|
+
|
9
|
+
class Variants
|
10
|
+
|
11
|
+
include Enumerable
|
12
|
+
extend Forwardable
|
13
|
+
def_delegators :@assembly, :each, :each_key, :each_value, :size, :length, :[]
|
14
|
+
attr_accessor :assembly, :has_run, :pileups, :hmes_frags, :bfr_frags
|
15
|
+
|
16
|
+
def initialize(options)
|
17
|
+
@params = options
|
18
|
+
@assembly = {}
|
19
|
+
@pileups = {}
|
20
|
+
Bio::FastaFormat.open(@params.assembly).each do |entry|
|
21
|
+
if entry.seq.length == 0
|
22
|
+
logger.error "No sequence found for entry #{entry.entry_id}"
|
23
|
+
raise VariantsError
|
24
|
+
end
|
25
|
+
contig = Contig.new(entry)
|
26
|
+
if @assembly.key?(contig.id)
|
27
|
+
logger.error "fasta id already found in the file for #{contig.id}"
|
28
|
+
logger.error 'make sure there are no duplicate entries in the fasta file'
|
29
|
+
raise VariantsError
|
30
|
+
end
|
31
|
+
@assembly[contig.id] = contig
|
32
|
+
@pileups[contig.id] = ContigPileups.new(contig.id)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Read and store pileup data for each bulk and parents
|
37
|
+
#
|
38
|
+
def analyse_pileups
|
39
|
+
@bg_bulk = @params.bg_bulk
|
40
|
+
@mut_parent = @params.mut_parent
|
41
|
+
@bg_parent = @params.bg_parent
|
42
|
+
|
43
|
+
%i{mut_bulk bg_bulk mut_parent bg_parent}.each do | input |
|
44
|
+
infile = @params[input]
|
45
|
+
if infile != ''
|
46
|
+
extract_pileup(infile, input)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
@has_run = true
|
51
|
+
end
|
52
|
+
|
53
|
+
def extract_pileup(pileupfile, sym)
|
54
|
+
# read mpileup file and process each variant
|
55
|
+
File.foreach(pileupfile) do |line|
|
56
|
+
pileup = Pileup.new(line)
|
57
|
+
if pileup.is_var
|
58
|
+
contig_obj = @pileups[pileup.ref_name]
|
59
|
+
contig_obj.send(sym).store(pileup.pos, pileup)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def compare_pileups
|
65
|
+
unless defined?(@has_run)
|
66
|
+
self.analyse_pileups
|
67
|
+
end
|
68
|
+
@assembly.each_key do | id |
|
69
|
+
contig = @assembly[id]
|
70
|
+
# extract parental hemi snps for polyploids before bulks are compared
|
71
|
+
if @mut_parent != '' or @bg_parent != ''
|
72
|
+
@pileups[id].hemisnps_in_parent
|
73
|
+
end
|
74
|
+
contig.hm_pos, contig.ht_pos, contig.hemi_pos = @pileups[id].bulks_compared
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def hmes_frags
|
79
|
+
# calculate every time method gets called
|
80
|
+
@hmes_frags = select_contigs(:hme_score)
|
81
|
+
end
|
82
|
+
|
83
|
+
def bfr_frags
|
84
|
+
unless defined?(@bfr_frags)
|
85
|
+
@bfr_frags = select_contigs(:bfr_score)
|
86
|
+
end
|
87
|
+
@bfr_frags
|
88
|
+
end
|
89
|
+
|
90
|
+
def select_contigs(ratio_type)
|
91
|
+
selected_contigs ={}
|
92
|
+
only_frag_with_vars = Options.params.only_frag_with_vars
|
93
|
+
@assembly.each_key do | frag |
|
94
|
+
if only_frag_with_vars
|
95
|
+
if ratio_type == :hme_score
|
96
|
+
# selecting fragments which have a variant
|
97
|
+
if @assembly[frag].hm_num + @assembly[frag].ht_num > 2 * Options.params.hmes_adjust
|
98
|
+
selected_contigs[frag] = @assembly[frag]
|
99
|
+
end
|
100
|
+
else # ratio_type == :bfr_score
|
101
|
+
# in polyploidy scenario selecting fragments with at least one bfr position
|
102
|
+
if @assembly[frag].hemi_num > 0
|
103
|
+
selected_contigs[frag] = @assembly[frag]
|
104
|
+
end
|
105
|
+
end
|
106
|
+
else
|
107
|
+
selected_contigs[frag] = @assembly[frag]
|
108
|
+
end
|
109
|
+
end
|
110
|
+
selected_contigs = filter_contigs(selected_contigs, ratio_type)
|
111
|
+
if only_frag_with_vars
|
112
|
+
logger.info "Selected #{selected_contigs.length} out of #{@assembly.length} fragments with #{ratio_type} score\n"
|
113
|
+
else
|
114
|
+
logger.info "No filtering was applied to fragments\n"
|
115
|
+
end
|
116
|
+
selected_contigs
|
117
|
+
end
|
118
|
+
|
119
|
+
def filter_contigs(selected_contigs, ratio_type)
|
120
|
+
cutoff = get_cutoff(selected_contigs, ratio_type)
|
121
|
+
selected_contigs.each_key do | frag |
|
122
|
+
if selected_contigs[frag].send(ratio_type) < cutoff
|
123
|
+
selected_contigs.delete(frag)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
selected_contigs
|
127
|
+
end
|
128
|
+
|
129
|
+
def get_cutoff(selected_contigs, ratio_type)
|
130
|
+
filter_out_low_hmes = Options.params.filter_out_low_hmes
|
131
|
+
# set minimum cut off hme_score or bfr_score to pick fragments with variants
|
132
|
+
# calculate min hme score for back or out crossed data or bfr_score for polypoidy data
|
133
|
+
# if no filtering applied set cutoff to 1.1
|
134
|
+
if filter_out_low_hmes
|
135
|
+
if ratio_type == :hme_score
|
136
|
+
adjust = Options.params.hmes_adjust
|
137
|
+
if Options.params.cross_type == 'back'
|
138
|
+
cutoff = (1.0/adjust) + 1.0
|
139
|
+
else # outcross
|
140
|
+
cutoff = (2.0/adjust) + 1.0
|
141
|
+
end
|
142
|
+
else # ratio_type is bfr_score
|
143
|
+
cutoff = bfr_cutoff(selected_contigs)
|
144
|
+
end
|
145
|
+
else
|
146
|
+
cutoff = 0.0
|
147
|
+
end
|
148
|
+
cutoff
|
149
|
+
end
|
150
|
+
|
151
|
+
def bfr_cutoff(selected_contigs, prop=0.1)
|
152
|
+
ratios = []
|
153
|
+
selected_contigs.each_key do | frag |
|
154
|
+
ratios << selected_contigs[frag].bfr_score
|
155
|
+
end
|
156
|
+
ratios.sort!.reverse!
|
157
|
+
index = (ratios.length * prop)/100
|
158
|
+
# set a minmum index to get at least one contig
|
159
|
+
if index < 1
|
160
|
+
index = 1
|
161
|
+
end
|
162
|
+
ratios[index - 1]
|
163
|
+
end
|
164
|
+
|
165
|
+
# method is to discard homozygous variant positions for which background bulk
|
166
|
+
# pileup shows proportion higher than 0.35 for variant allele/non-reference allele
|
167
|
+
# a recessive variant is expected to have 1/3rd frequency in background bulk
|
168
|
+
def verify_bg_bulk_pileup
|
169
|
+
unless defined?(@hmes_frags)
|
170
|
+
self.hmes_frags
|
171
|
+
end
|
172
|
+
@hmes_frags.each_key do | frag |
|
173
|
+
positions = @assembly[frag].hm_pos.keys
|
174
|
+
contig_pileup_obj = @pileups[frag]
|
175
|
+
positions.each do | pos |
|
176
|
+
if contig_pileup_obj.mut_bulk.key?(pos)
|
177
|
+
mut_pileup = contig_pileup_obj.mut_bulk[pos]
|
178
|
+
if mut_pileup.is_var
|
179
|
+
if contig_pileup_obj.bg_bulk.key?(pos)
|
180
|
+
bg_pileup = contig_pileup_obj.bg_bulk[pos]
|
181
|
+
if bg_pileup.non_ref_ratio > 0.35
|
182
|
+
@assembly[frag].hm_pos.delete(pos)
|
183
|
+
end
|
184
|
+
end
|
185
|
+
else
|
186
|
+
# this should not happen, may be catch as as an error
|
187
|
+
@assembly[frag].hm_pos.delete(pos)
|
188
|
+
end
|
189
|
+
else
|
190
|
+
# this should not happen, may be catch as as an error
|
191
|
+
@assembly[frag].hm_pos.delete(pos)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
195
|
+
# recalculate hmes_frags once pileups are verified
|
196
|
+
self.hmes_frags
|
197
|
+
end
|
198
|
+
|
199
|
+
end # Variants
|
200
|
+
|
201
|
+
end # Cheripic
|
data/lib/cheripic/version.rb
CHANGED
data/lib/cheripic.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'cheripic/cmd'
|
2
|
-
require 'cheripic/version'
|
3
1
|
|
4
2
|
# set up a golbal logger object to access across module
|
5
3
|
require 'yell'
|
@@ -23,3 +21,14 @@ module Cheripic
|
|
23
21
|
Object.send :include, Yell::Loggable
|
24
22
|
|
25
23
|
end # Cheripic
|
24
|
+
|
25
|
+
require 'cheripic/cmd'
|
26
|
+
require 'cheripic/version'
|
27
|
+
require 'cheripic/implementer'
|
28
|
+
require 'cheripic/variants'
|
29
|
+
require 'cheripic/contig'
|
30
|
+
require 'cheripic/pileup'
|
31
|
+
require 'cheripic/options'
|
32
|
+
require 'cheripic/contig_pileups'
|
33
|
+
require 'cheripic/bfr'
|
34
|
+
require 'cheripic/regions'
|
metadata
CHANGED
@@ -1,20 +1,23 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cheripic
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shyam Rallapalli
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-07-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: yell
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.0'
|
20
|
+
- - ">="
|
18
21
|
- !ruby/object:Gem::Version
|
19
22
|
version: 2.0.5
|
20
23
|
type: :runtime
|
@@ -22,6 +25,9 @@ dependencies:
|
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
23
26
|
requirements:
|
24
27
|
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '2.0'
|
30
|
+
- - ">="
|
25
31
|
- !ruby/object:Gem::Version
|
26
32
|
version: 2.0.5
|
27
33
|
- !ruby/object:Gem::Dependency
|
@@ -29,6 +35,9 @@ dependencies:
|
|
29
35
|
requirement: !ruby/object:Gem::Requirement
|
30
36
|
requirements:
|
31
37
|
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '2.1'
|
40
|
+
- - ">="
|
32
41
|
- !ruby/object:Gem::Version
|
33
42
|
version: 2.1.2
|
34
43
|
type: :runtime
|
@@ -36,6 +45,9 @@ dependencies:
|
|
36
45
|
version_requirements: !ruby/object:Gem::Requirement
|
37
46
|
requirements:
|
38
47
|
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '2.1'
|
50
|
+
- - ">="
|
39
51
|
- !ruby/object:Gem::Version
|
40
52
|
version: 2.1.2
|
41
53
|
- !ruby/object:Gem::Dependency
|
@@ -43,6 +55,9 @@ dependencies:
|
|
43
55
|
requirement: !ruby/object:Gem::Requirement
|
44
56
|
requirements:
|
45
57
|
- - "~>"
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '1.5'
|
60
|
+
- - ">="
|
46
61
|
- !ruby/object:Gem::Version
|
47
62
|
version: 1.5.0
|
48
63
|
type: :runtime
|
@@ -50,13 +65,33 @@ dependencies:
|
|
50
65
|
version_requirements: !ruby/object:Gem::Requirement
|
51
66
|
requirements:
|
52
67
|
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '1.5'
|
70
|
+
- - ">="
|
53
71
|
- !ruby/object:Gem::Version
|
54
72
|
version: 1.5.0
|
73
|
+
- !ruby/object:Gem::Dependency
|
74
|
+
name: bio-gngm
|
75
|
+
requirement: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - "~>"
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: 0.2.1
|
80
|
+
type: :runtime
|
81
|
+
prerelease: false
|
82
|
+
version_requirements: !ruby/object:Gem::Requirement
|
83
|
+
requirements:
|
84
|
+
- - "~>"
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: 0.2.1
|
55
87
|
- !ruby/object:Gem::Dependency
|
56
88
|
name: rinruby
|
57
89
|
requirement: !ruby/object:Gem::Requirement
|
58
90
|
requirements:
|
59
91
|
- - "~>"
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '2.0'
|
94
|
+
- - ">="
|
60
95
|
- !ruby/object:Gem::Version
|
61
96
|
version: 2.0.3
|
62
97
|
type: :runtime
|
@@ -64,8 +99,25 @@ dependencies:
|
|
64
99
|
version_requirements: !ruby/object:Gem::Requirement
|
65
100
|
requirements:
|
66
101
|
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '2.0'
|
104
|
+
- - ">="
|
67
105
|
- !ruby/object:Gem::Version
|
68
106
|
version: 2.0.3
|
107
|
+
- !ruby/object:Gem::Dependency
|
108
|
+
name: activesupport
|
109
|
+
requirement: !ruby/object:Gem::Requirement
|
110
|
+
requirements:
|
111
|
+
- - "~>"
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: 4.2.6
|
114
|
+
type: :development
|
115
|
+
prerelease: false
|
116
|
+
version_requirements: !ruby/object:Gem::Requirement
|
117
|
+
requirements:
|
118
|
+
- - "~>"
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: 4.2.6
|
69
121
|
- !ruby/object:Gem::Dependency
|
70
122
|
name: bundler
|
71
123
|
requirement: !ruby/object:Gem::Requirement
|
@@ -126,6 +178,9 @@ dependencies:
|
|
126
178
|
name: simplecov
|
127
179
|
requirement: !ruby/object:Gem::Requirement
|
128
180
|
requirements:
|
181
|
+
- - "~>"
|
182
|
+
- !ruby/object:Gem::Version
|
183
|
+
version: '0.8'
|
129
184
|
- - ">="
|
130
185
|
- !ruby/object:Gem::Version
|
131
186
|
version: 0.8.2
|
@@ -133,6 +188,9 @@ dependencies:
|
|
133
188
|
prerelease: false
|
134
189
|
version_requirements: !ruby/object:Gem::Requirement
|
135
190
|
requirements:
|
191
|
+
- - "~>"
|
192
|
+
- !ruby/object:Gem::Version
|
193
|
+
version: '0.8'
|
136
194
|
- - ">="
|
137
195
|
- !ruby/object:Gem::Version
|
138
196
|
version: 0.8.2
|
@@ -140,6 +198,9 @@ dependencies:
|
|
140
198
|
name: shoulda
|
141
199
|
requirement: !ruby/object:Gem::Requirement
|
142
200
|
requirements:
|
201
|
+
- - "~>"
|
202
|
+
- !ruby/object:Gem::Version
|
203
|
+
version: '3.5'
|
143
204
|
- - ">="
|
144
205
|
- !ruby/object:Gem::Version
|
145
206
|
version: 3.5.0
|
@@ -147,6 +208,9 @@ dependencies:
|
|
147
208
|
prerelease: false
|
148
209
|
version_requirements: !ruby/object:Gem::Requirement
|
149
210
|
requirements:
|
211
|
+
- - "~>"
|
212
|
+
- !ruby/object:Gem::Version
|
213
|
+
version: '3.5'
|
150
214
|
- - ">="
|
151
215
|
- !ruby/object:Gem::Version
|
152
216
|
version: 3.5.0
|
@@ -154,6 +218,9 @@ dependencies:
|
|
154
218
|
name: coveralls
|
155
219
|
requirement: !ruby/object:Gem::Requirement
|
156
220
|
requirements:
|
221
|
+
- - "~>"
|
222
|
+
- !ruby/object:Gem::Version
|
223
|
+
version: '0.7'
|
157
224
|
- - ">="
|
158
225
|
- !ruby/object:Gem::Version
|
159
226
|
version: 0.7.2
|
@@ -161,6 +228,9 @@ dependencies:
|
|
161
228
|
prerelease: false
|
162
229
|
version_requirements: !ruby/object:Gem::Requirement
|
163
230
|
requirements:
|
231
|
+
- - "~>"
|
232
|
+
- !ruby/object:Gem::Version
|
233
|
+
version: '0.7'
|
164
234
|
- - ">="
|
165
235
|
- !ruby/object:Gem::Version
|
166
236
|
version: 0.7.2
|
@@ -183,7 +253,15 @@ files:
|
|
183
253
|
- bin/setup
|
184
254
|
- cheripic.gemspec
|
185
255
|
- lib/cheripic.rb
|
256
|
+
- lib/cheripic/bfr.rb
|
186
257
|
- lib/cheripic/cmd.rb
|
258
|
+
- lib/cheripic/contig.rb
|
259
|
+
- lib/cheripic/contig_pileups.rb
|
260
|
+
- lib/cheripic/implementer.rb
|
261
|
+
- lib/cheripic/options.rb
|
262
|
+
- lib/cheripic/pileup.rb
|
263
|
+
- lib/cheripic/regions.rb
|
264
|
+
- lib/cheripic/variants.rb
|
187
265
|
- lib/cheripic/version.rb
|
188
266
|
homepage: https://github.com/shyamrallapalli/cheripic
|
189
267
|
licenses:
|