cheripic 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/README.md +9 -4
- data/cheripic.gemspec +9 -8
- data/lib/cheripic/bfr.rb +84 -0
- data/lib/cheripic/cmd.rb +26 -33
- data/lib/cheripic/contig.rb +66 -0
- data/lib/cheripic/contig_pileups.rb +143 -0
- data/lib/cheripic/implementer.rb +80 -0
- data/lib/cheripic/options.rb +42 -0
- data/lib/cheripic/pileup.rb +186 -0
- data/lib/cheripic/regions.rb +46 -0
- data/lib/cheripic/variants.rb +201 -0
- data/lib/cheripic/version.rb +1 -1
- data/lib/cheripic.rb +11 -2
- metadata +80 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ace583d5063ea92f69faa4430b71f0bb0f654528
|
4
|
+
data.tar.gz: 00ae530b7c5c162aa0e699cae2c5dcaa9d159673
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c179df9e44bdff364c8c9c7dd2a779609b05ed3d2f0ef5385c5c5ebb4910a98c35a05d861851dea3f34b14ab8d188994e6b7f254dde2b9356e73dbff08386cf0
|
7
|
+
data.tar.gz: f21ee021e4594bacaf319170746ad7655b0c579cb0c49495bf33dbef9dabd059f8d2e81edf44d64d24cea1f86a3e6315782d87fd8e475cf5698d35dce1bd3079
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,13 +1,18 @@
|
|
1
1
|
# Cheripic
|
2
2
|
|
3
|
+
[](https://badge.fury.io/rb/cheripic)
|
3
4
|
[](https://travis-ci.org/shyamrallapalli/cheripic)
|
4
5
|
[](https://coveralls.io/github/shyamrallapalli/cheripic?branch=master)
|
6
|
+
[](https://codeclimate.com/github/shyamrallapalli/cheripic)
|
5
7
|
|
6
8
|
|
7
|
-
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/cheripic`. To experiment with that code, run `bin/console` for an interactive prompt.
|
8
|
-
|
9
|
-
TODO: Delete this and the text above, and describe your gem
|
10
9
|
|
10
|
+
Computing Homozygosity Enriched Regions In genomes to Prioritize Identification of Candidate variants (CHERIPIC),
|
11
|
+
is a ruby tools to pick causative mutation from bulks segregant sequencing.
|
12
|
+
|
13
|
+
Currently this gem is still in development and nearing complete working package.
|
14
|
+
|
15
|
+
|
11
16
|
## Installation
|
12
17
|
|
13
18
|
Add this line to your application's Gemfile:
|
@@ -36,7 +41,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
36
41
|
|
37
42
|
## Contributing
|
38
43
|
|
39
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
44
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/shyamrallapalli/cheripic. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](contributor-covenant.org) code of conduct.
|
40
45
|
|
41
46
|
|
42
47
|
## License
|
data/cheripic.gemspec
CHANGED
@@ -19,18 +19,19 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
20
|
spec.require_paths = ['lib']
|
21
21
|
|
22
|
-
spec.
|
23
|
-
spec.
|
24
|
-
spec.
|
22
|
+
spec.add_runtime_dependency 'yell', '~> 2.0', '>= 2.0.5'
|
23
|
+
spec.add_runtime_dependency 'trollop', '~> 2.1', '>= 2.1.2'
|
24
|
+
spec.add_runtime_dependency 'bio', '~> 1.5', '>= 1.5.0'
|
25
25
|
# spec.add_dependency 'bio-samtools', '~> 2.3.3'
|
26
|
-
|
27
|
-
spec.
|
26
|
+
spec.add_dependency 'bio-gngm', '~> 0.2.1'
|
27
|
+
spec.add_runtime_dependency 'rinruby', '~> 2.0', '>= 2.0.3'
|
28
28
|
|
29
|
+
spec.add_development_dependency 'activesupport', '~> 4.2.6'
|
29
30
|
spec.add_development_dependency 'bundler', '~> 1.10'
|
30
31
|
spec.add_development_dependency 'rake', '~> 10.0'
|
31
32
|
spec.add_development_dependency 'minitest'
|
32
33
|
spec.add_development_dependency 'minitest-reporters', '>= 1.0.17'
|
33
|
-
spec.add_development_dependency 'simplecov', '>= 0.8.2'
|
34
|
-
spec.add_development_dependency 'shoulda', '>= 3.5.0'
|
35
|
-
spec.add_development_dependency 'coveralls', '>= 0.7.2'
|
34
|
+
spec.add_development_dependency 'simplecov', '~> 0.8', '>= 0.8.2'
|
35
|
+
spec.add_development_dependency 'shoulda', '~> 3.5', '>= 3.5.0'
|
36
|
+
spec.add_development_dependency 'coveralls', '~> 0.7', '>= 0.7.2'
|
36
37
|
end
|
data/lib/cheripic/bfr.rb
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Cheripic
|
4
|
+
|
5
|
+
class BfrError < CheripicError; end
|
6
|
+
|
7
|
+
class Bfr
|
8
|
+
|
9
|
+
attr_accessor :bfr_adj
|
10
|
+
|
11
|
+
# get bulk frequency ratio (bfr) for marked hemi snps only
|
12
|
+
# ignore positions with complex variants
|
13
|
+
def self.get_bfr(mut_hash, bg_hash='')
|
14
|
+
@bfr_adj = Options.params.bfr_adjust
|
15
|
+
if bg_hash != ''
|
16
|
+
# checking if only two vars in base hash and that includes ref
|
17
|
+
# checking if only one var in hemi snp
|
18
|
+
# suggests enrichment for one of two alleles
|
19
|
+
if mut_hash.length == 2 and mut_hash.key?(:ref)
|
20
|
+
bfr = calculate_bfr(mut_hash, bg_hash)
|
21
|
+
elsif bg_hash.length == 2 and bg_hash.key?(:ref)
|
22
|
+
bfr = calculate_bfr(bg_hash, mut_hash)
|
23
|
+
elsif mut_hash.length == 1 and mut_hash[:ref] == nil
|
24
|
+
bfr = calculate_bfr(mut_hash, bg_hash)
|
25
|
+
elsif bg_hash.length == 1 and bg_hash[:ref] == nil
|
26
|
+
bfr = calculate_bfr(bg_hash, mut_hash)
|
27
|
+
else # complex
|
28
|
+
bfr = ''
|
29
|
+
end
|
30
|
+
elsif mut_hash.length == 2 and mut_hash.key?(:ref)
|
31
|
+
bfr = calc_fraction(mut_hash)[0]/ @bfr_adj
|
32
|
+
elsif mut_hash.length == 1 and mut_hash[:ref] == nil
|
33
|
+
bfr = calc_fraction(mut_hash)[0]/ @bfr_adj
|
34
|
+
else
|
35
|
+
bfr = ''
|
36
|
+
end
|
37
|
+
bfr
|
38
|
+
end
|
39
|
+
|
40
|
+
# calculate bfr using both mutant and background bulk information
|
41
|
+
def self.calculate_bfr(two_key_hash, other_hash)
|
42
|
+
# fix :ref value if absent due to below noise depth
|
43
|
+
unless two_key_hash.key?(:ref)
|
44
|
+
two_key_hash[:ref] = 0
|
45
|
+
end
|
46
|
+
unless other_hash.key?(:ref)
|
47
|
+
other_hash[:ref] = 0
|
48
|
+
end
|
49
|
+
frac_1, base = calc_fraction(two_key_hash)
|
50
|
+
if other_hash.key?(base)
|
51
|
+
sum = other_hash[base] + other_hash[:ref] + @bfr_adj
|
52
|
+
frac_2 = (other_hash[base] + @bfr_adj)/sum
|
53
|
+
else
|
54
|
+
sum = other_hash[:ref] + @bfr_adj
|
55
|
+
frac_2 = @bfr_adj/sum
|
56
|
+
end
|
57
|
+
# making sure ratio is always 1 or grater
|
58
|
+
if frac_1 > frac_2
|
59
|
+
bfr = frac_1/frac_2
|
60
|
+
else
|
61
|
+
bfr = frac_2/frac_1
|
62
|
+
end
|
63
|
+
bfr
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.calc_fraction(hash)
|
67
|
+
unless hash.key?(:ref)
|
68
|
+
hash[:ref] = 0
|
69
|
+
end
|
70
|
+
array = hash.keys
|
71
|
+
sum = hash[array[0]] + hash[array[1]] + @bfr_adj
|
72
|
+
if array[0] == :ref
|
73
|
+
frac = (hash[array[1]] + @bfr_adj)/sum
|
74
|
+
base = array[1]
|
75
|
+
else
|
76
|
+
frac = (hash[array[0]] + @bfr_adj)/sum
|
77
|
+
base = array[0]
|
78
|
+
end
|
79
|
+
[frac, base]
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
data/lib/cheripic/cmd.rb
CHANGED
@@ -8,13 +8,14 @@ module Cheripic
|
|
8
8
|
require 'pathname'
|
9
9
|
require 'ostruct'
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
11
|
+
attr_accessor :options
|
12
|
+
|
13
|
+
def initialize(args)
|
14
|
+
@options = parse_arguments(args)
|
14
15
|
check_arguments
|
15
16
|
end
|
16
17
|
|
17
|
-
def parse_arguments
|
18
|
+
def parse_arguments(args)
|
18
19
|
Trollop::with_standard_exception_handling argument_parser do
|
19
20
|
if args.empty? || args.include?('-h') || args.include?('--help')
|
20
21
|
raise Trollop::HelpNeeded
|
@@ -45,7 +46,7 @@ module Cheripic
|
|
45
46
|
opt :output, 'Directory to store results, will be created if not existing',
|
46
47
|
:default => 'cheripic_results'
|
47
48
|
opt :loglevel, 'Choose any one of "info / warn / debug" level for logs generated',
|
48
|
-
:default => '
|
49
|
+
:default => 'debug'
|
49
50
|
opt :hmes_adjust, 'factor added to snp count of each contig to adjust for hme score calculations',
|
50
51
|
:type => Float,
|
51
52
|
:default => 0.5
|
@@ -98,7 +99,7 @@ module Cheripic
|
|
98
99
|
:short => '-r',
|
99
100
|
:type => String,
|
100
101
|
:default => ''
|
101
|
-
opt :
|
102
|
+
opt :bfr_adjust, 'factor added to hemi snp frequency of each parent to adjust for bfr calculations',
|
102
103
|
:type => Float,
|
103
104
|
:default => 0.05
|
104
105
|
opt :examples, 'shows some example commands with explanation'
|
@@ -140,19 +141,20 @@ OPTIONS:
|
|
140
141
|
end
|
141
142
|
|
142
143
|
def check_arguments
|
143
|
-
|
144
|
-
|
145
|
-
|
144
|
+
check_output_dir
|
145
|
+
check_log_level
|
146
|
+
check_input_files
|
146
147
|
end
|
147
148
|
|
149
|
+
# TODO: check bulk input types and process associated files
|
148
150
|
# def check_input_types
|
149
|
-
# if @options
|
151
|
+
# if @options[:input_format] == 'vcf'
|
150
152
|
#
|
151
153
|
# end
|
152
154
|
# end
|
153
155
|
|
154
|
-
def
|
155
|
-
if @options
|
156
|
+
def check_input_files
|
157
|
+
if @options[:polyploidy]
|
156
158
|
inputfiles = %i{assembly mut_bulk bg_bulk mut_parent bg_parent}
|
157
159
|
else
|
158
160
|
inputfiles = %i{assembly mut_bulk bg_bulk}
|
@@ -171,35 +173,26 @@ OPTIONS:
|
|
171
173
|
end
|
172
174
|
end
|
173
175
|
|
174
|
-
def
|
175
|
-
if Dir.exist?(@options
|
176
|
-
raise CheripicArgError.new "#{@options
|
176
|
+
def check_output_dir
|
177
|
+
if Dir.exist?(@options[:output])
|
178
|
+
raise CheripicArgError.new "#{@options[:output]} directory exists" +
|
177
179
|
'please choose a different output directory name'
|
178
180
|
end
|
179
181
|
end
|
180
182
|
|
181
|
-
def
|
182
|
-
unless %w(error info warn debug).include?(@options
|
183
|
-
raise CheripicArgError.new "Loglevel #{@options
|
183
|
+
def check_log_level
|
184
|
+
unless %w(error info warn debug).include?(@options[:loglevel])
|
185
|
+
raise CheripicArgError.new "Loglevel #{@options[:loglevel]} is not valid. " +
|
184
186
|
'It must be one of: error, info, warn, debug.'
|
185
187
|
end
|
186
|
-
logger.level = Yell::Level.new @options
|
188
|
+
logger.level = Yell::Level.new @options[:loglevel].to_sym
|
187
189
|
end
|
188
190
|
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
# end
|
195
|
-
#
|
196
|
-
# def analyse_bulks
|
197
|
-
# assembly = @options.assembly
|
198
|
-
# logger.info "Loading assembly: #{assembly}"
|
199
|
-
# # a = Assembly.new assembly
|
200
|
-
# logger.info "Analysing assembly: #{assembly}"
|
201
|
-
#
|
202
|
-
# end
|
191
|
+
def run
|
192
|
+
@options[:output] = File.expand_path @options[:output]
|
193
|
+
analysis = Implementer.new(@options)
|
194
|
+
analysis.run
|
195
|
+
end
|
203
196
|
|
204
197
|
end # Cmd
|
205
198
|
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'bio'
|
3
|
+
require 'forwardable'
|
4
|
+
|
5
|
+
module Cheripic
|
6
|
+
|
7
|
+
class ContigError < CheripicError; end
|
8
|
+
|
9
|
+
class Contig
|
10
|
+
|
11
|
+
include Enumerable
|
12
|
+
extend Forwardable
|
13
|
+
# delegate [:size, :length] => :@contig
|
14
|
+
# def_delegator :@contig, :entry_id, :id
|
15
|
+
attr_accessor :hm_pos, :ht_pos, :hemi_pos, :id, :length
|
16
|
+
|
17
|
+
def initialize (fasta)
|
18
|
+
@id = fasta.entry_id
|
19
|
+
@length = fasta.length
|
20
|
+
@hm_pos = {}
|
21
|
+
@ht_pos = {}
|
22
|
+
@hemi_pos = {}
|
23
|
+
end
|
24
|
+
|
25
|
+
def hm_num
|
26
|
+
self.hm_pos.length
|
27
|
+
end
|
28
|
+
|
29
|
+
def ht_num
|
30
|
+
self.ht_pos.length
|
31
|
+
end
|
32
|
+
|
33
|
+
def hme_score
|
34
|
+
hmes_adjust = Options.params.hmes_adjust
|
35
|
+
if self.hm_num == 0 and self.ht_num == 0
|
36
|
+
0.0
|
37
|
+
else
|
38
|
+
(self.hm_num + hmes_adjust) / (self.ht_num + hmes_adjust)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def hemi_num
|
43
|
+
self.hemi_pos.length
|
44
|
+
end
|
45
|
+
|
46
|
+
def bfr_score
|
47
|
+
if self.hemi_pos.values.empty?
|
48
|
+
0.0
|
49
|
+
else
|
50
|
+
geom_mean(self.hemi_pos.values)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# geometric mean of an array of numbers
|
55
|
+
def geom_mean(array)
|
56
|
+
return array[0].to_f if array.length == 1
|
57
|
+
array.reduce(:+) / array.size.to_f
|
58
|
+
# sum = 0.0
|
59
|
+
# array.each{ |v| sum += Math.log(v.to_f) }
|
60
|
+
# sum /= array.size
|
61
|
+
# Math.exp sum
|
62
|
+
end
|
63
|
+
|
64
|
+
end # Contig
|
65
|
+
|
66
|
+
end # Cheripic
|
@@ -0,0 +1,143 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'bio'
|
3
|
+
require 'forwardable'
|
4
|
+
|
5
|
+
module Cheripic
|
6
|
+
|
7
|
+
class ContigPileupsError < CheripicError; end
|
8
|
+
|
9
|
+
class ContigPileups
|
10
|
+
|
11
|
+
include Enumerable
|
12
|
+
extend Forwardable
|
13
|
+
def_delegators :@mut_bulk, :each, :each_key, :each_value, :length, :[], :store
|
14
|
+
def_delegators :@bg_bulk, :each, :each_key, :each_value, :length, :[], :store
|
15
|
+
def_delegators :@mut_parent, :each, :each_key, :each_value, :length, :[], :store
|
16
|
+
def_delegators :@bg_parent, :each, :each_key, :each_value, :length, :[], :store
|
17
|
+
attr_accessor :id, :parent_hemi
|
18
|
+
attr_accessor :mut_bulk, :bg_bulk, :mut_parent, :bg_parent
|
19
|
+
|
20
|
+
def initialize (fasta)
|
21
|
+
@id = fasta
|
22
|
+
@mut_bulk = {}
|
23
|
+
@bg_bulk = {}
|
24
|
+
@mut_parent = {}
|
25
|
+
@bg_parent = {}
|
26
|
+
@parent_hemi = {}
|
27
|
+
end
|
28
|
+
|
29
|
+
def bulks_compared
|
30
|
+
@hm_pos = {}
|
31
|
+
@ht_pos = {}
|
32
|
+
@hemi_pos = {}
|
33
|
+
@mut_bulk.each_key do | pos |
|
34
|
+
if Options.params.polyploidy and @parent_hemi.key?(pos)
|
35
|
+
bg_bases = ''
|
36
|
+
if @bg_bulk.key?(pos)
|
37
|
+
bg_bases = @bg_bulk[pos].var_base_frac
|
38
|
+
end
|
39
|
+
mut_bases = @mut_bulk[pos].var_base_frac
|
40
|
+
bfr = Bfr.get_bfr(mut_bases, bg_bases)
|
41
|
+
@hemi_pos[pos] = bfr
|
42
|
+
else
|
43
|
+
self.compare_pileup(pos)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
[@hm_pos, @ht_pos, @hemi_pos]
|
47
|
+
end
|
48
|
+
|
49
|
+
# we are only dealing with single element hashes
|
50
|
+
# so discard hashes with more than one element and empty hashes
|
51
|
+
# empty hash results from position below selected coverage or bases freq below noise
|
52
|
+
def compare_pileup(pos)
|
53
|
+
base_hash = @mut_bulk[pos].var_base_frac
|
54
|
+
base_hash.delete(:ref)
|
55
|
+
return nil if base_hash.empty?
|
56
|
+
# we could ignore complex loci or
|
57
|
+
# take the variant type based on predominant base
|
58
|
+
if base_hash.length > 1
|
59
|
+
mut_type, ratio = var_mode(base_hash.values.max)
|
60
|
+
else
|
61
|
+
base = base_hash.keys[0]
|
62
|
+
mut_type, ratio = var_mode(base_hash[base])
|
63
|
+
end
|
64
|
+
if @bg_bulk.key?(pos)
|
65
|
+
bg_type = bg_bulk_var(pos)
|
66
|
+
mut_type = compare_var_type(mut_type, bg_type)
|
67
|
+
end
|
68
|
+
unless mut_type == nil
|
69
|
+
categorise_pos(mut_type, pos, ratio)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# if both bulks have homozygous var at this position
|
74
|
+
# then ignore the position
|
75
|
+
def compare_var_type(muttype, bgtype)
|
76
|
+
if muttype == :hom and bgtype == :hom
|
77
|
+
nil
|
78
|
+
else
|
79
|
+
muttype
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def bg_bulk_var(pos)
|
84
|
+
bg_base_hash = @bg_bulk[pos].var_base_frac
|
85
|
+
if bg_base_hash.length > 1
|
86
|
+
# taking only var mode
|
87
|
+
var_mode(bg_base_hash.values.max)[0]
|
88
|
+
else
|
89
|
+
# taking only var mode
|
90
|
+
var_mode(bg_base_hash[0])[0]
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def categorise_pos(var_type, pos, ratio)
|
95
|
+
if var_type == :hom
|
96
|
+
@hm_pos[pos] = ratio
|
97
|
+
elsif var_type == :het
|
98
|
+
@ht_pos[pos] = ratio
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# calculate var zygosity for non-polyploid variants
|
103
|
+
# increased range is used for heterozygosity for RNA-seq data
|
104
|
+
def var_mode(ratio)
|
105
|
+
ht_low = Options.params.htlow
|
106
|
+
ht_high = Options.params.hthigh
|
107
|
+
mode = ''
|
108
|
+
if ratio.between?(ht_low, ht_high)
|
109
|
+
mode = :het
|
110
|
+
elsif ratio > ht_high
|
111
|
+
mode = :hom
|
112
|
+
end
|
113
|
+
[mode, ratio]
|
114
|
+
end
|
115
|
+
|
116
|
+
def hemisnps_in_parent
|
117
|
+
# mark all the hemi snp based on both parents
|
118
|
+
self.mut_parent.each_key do |pos|
|
119
|
+
mut_parent_frac = @mut_parent[pos].var_base_frac
|
120
|
+
if self.bg_parent.key?(pos)
|
121
|
+
bg_parent_frac = @bg_parent[pos].var_base_frac
|
122
|
+
bfr = Bfr.get_bfr(mut_parent_frac, bg_parent_frac)
|
123
|
+
@parent_hemi[pos] = bfr
|
124
|
+
self.bg_parent.delete(pos)
|
125
|
+
else
|
126
|
+
bfr = Bfr.get_bfr(mut_parent_frac)
|
127
|
+
@parent_hemi[pos] = bfr
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
# now include all hemi snp unique to background parent
|
132
|
+
self.bg_parent.each_key do |pos|
|
133
|
+
unless @parent_hemi.key?(pos)
|
134
|
+
bg_parent_frac = @bg_parent[pos].var_base_frac
|
135
|
+
bfr = Bfr.get_bfr(bg_parent_frac)
|
136
|
+
@parent_hemi[pos] = bfr
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
module Cheripic
|
4
|
+
|
5
|
+
class ImplementerError < CheripicError; end
|
6
|
+
|
7
|
+
class Implementer
|
8
|
+
|
9
|
+
require 'ostruct'
|
10
|
+
require 'fileutils'
|
11
|
+
attr_accessor :options, :variants
|
12
|
+
|
13
|
+
def initialize(inputs)
|
14
|
+
set1 = %i{assembly
|
15
|
+
input_format
|
16
|
+
mut_bulk
|
17
|
+
bg_bulk
|
18
|
+
output
|
19
|
+
mut_parent
|
20
|
+
bg_parent}
|
21
|
+
@options = OpenStruct.new(inputs.select { |k| set1.include?(k) })
|
22
|
+
|
23
|
+
set2 = %i{hmes_adjust
|
24
|
+
htlow
|
25
|
+
hthigh
|
26
|
+
mindepth
|
27
|
+
min_non_ref_count
|
28
|
+
min_indel_count_support
|
29
|
+
ignore_reference_n
|
30
|
+
mapping_quality
|
31
|
+
base_quality
|
32
|
+
noise
|
33
|
+
cross_type
|
34
|
+
only_frag_with_vars
|
35
|
+
filter_out_low_hmes
|
36
|
+
polyploidy
|
37
|
+
bfr_adjust}
|
38
|
+
settings = inputs.select { |k| set2.include?(k) }
|
39
|
+
Options.update(settings)
|
40
|
+
FileUtils.mkdir_p @options.output
|
41
|
+
end
|
42
|
+
|
43
|
+
def extract_vars
|
44
|
+
@variants = Variants.new(@options)
|
45
|
+
@variants.compare_pileups
|
46
|
+
end
|
47
|
+
|
48
|
+
def process_variants
|
49
|
+
@variants.verify_bg_bulk_pileup
|
50
|
+
# print selected variants that could be potential markers or mutation
|
51
|
+
out_file = File.open("#{@options.output}/selected_variants.txt", 'w')
|
52
|
+
out_file.puts "HME_Score\tAlleleFreq\tseq_id\tposition\tref_base\tcoverage\tbases\tbase_quals\tsequence_left\tAlt_seq\tsequence_right"
|
53
|
+
regions = Regions.new(@options.assembly)
|
54
|
+
@variants.hmes_frags.each_key do | frag |
|
55
|
+
contig_obj = @variants.assembly[frag]
|
56
|
+
positions = contig_obj.hm_pos.keys
|
57
|
+
positions.each do | pos |
|
58
|
+
pileup = @variants.pileups[frag].mut_bulk[pos]
|
59
|
+
seqs = regions.fetch_seq(frag,pos)
|
60
|
+
out_file.puts "#{contig_obj.hme_score}\t#{contig_obj.hm_pos[pos]}\t#{pileup.to_s.chomp}\t#{seqs[0]}\t#{pileup.consensus}\t#{seqs[1]}"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
out_file.close
|
64
|
+
end
|
65
|
+
|
66
|
+
def run
|
67
|
+
unless defined?(@variants.has_run)
|
68
|
+
self.extract_vars
|
69
|
+
end
|
70
|
+
if Options.params.polyploidy
|
71
|
+
self.process_variants
|
72
|
+
@variants.bfr_frags
|
73
|
+
else
|
74
|
+
self.process_variants
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
module Cheripic
|
4
|
+
|
5
|
+
class Options
|
6
|
+
|
7
|
+
require 'ostruct'
|
8
|
+
# class << self; attr_accessor :params end
|
9
|
+
|
10
|
+
@defaults = {
|
11
|
+
:hmes_adjust => 0.5,
|
12
|
+
:htlow => 0.2,
|
13
|
+
:hthigh => 0.9,
|
14
|
+
:mindepth => 6,
|
15
|
+
:min_non_ref_count => 3,
|
16
|
+
:min_indel_count_support => 3,
|
17
|
+
:ignore_reference_n => true,
|
18
|
+
:mapping_quality => 20,
|
19
|
+
:base_quality => 15,
|
20
|
+
:noise => 0.1,
|
21
|
+
:cross_type => 'back',
|
22
|
+
:only_frag_with_vars => true,
|
23
|
+
:filter_out_low_hmes => true,
|
24
|
+
:polyploidy => false,
|
25
|
+
:bfr_adjust => 0.05,
|
26
|
+
:sel_seq_len => 50
|
27
|
+
}
|
28
|
+
# @params = OpenStruct.new(@defaults)
|
29
|
+
|
30
|
+
def self.update(newset)
|
31
|
+
@defaults.merge!(newset)
|
32
|
+
self.params
|
33
|
+
# @params = OpenStruct.new(@defaults)
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.params
|
37
|
+
OpenStruct.new(@defaults)
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
@@ -0,0 +1,186 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'bio'
|
3
|
+
require 'bio-samtools'
|
4
|
+
require 'bio/db/pileup'
|
5
|
+
|
6
|
+
class Pileup < Bio::DB::Pileup
|
7
|
+
|
8
|
+
attr_accessor :defaults
|
9
|
+
|
10
|
+
def initialize(string, opts={})
|
11
|
+
super(string)
|
12
|
+
set_defaults(opts)
|
13
|
+
adj_read_bases
|
14
|
+
@indelbases = 'acgtryswkmbdhvnACGTRYSWKMBDHVN'
|
15
|
+
end
|
16
|
+
|
17
|
+
def set_defaults(opts)
|
18
|
+
@defaults = {
|
19
|
+
noise: 0.1, # noise level for read depth
|
20
|
+
ht_low: 0.2, # min allele freq for heterozygosity
|
21
|
+
ht_high: 0.9, # max allele freq for heterozygosity
|
22
|
+
min_depth: 6, # minimum coverage for variant
|
23
|
+
min_non_ref_count: 3,
|
24
|
+
ignore_reference_n: true,
|
25
|
+
min_indel_count_support: 3,
|
26
|
+
}
|
27
|
+
@defaults.merge(opts)
|
28
|
+
end
|
29
|
+
|
30
|
+
# removes mapping quality information
|
31
|
+
def adj_read_bases
|
32
|
+
# mapping quality after '^' symbol is substituted
|
33
|
+
# to avoid splitting at non indel + or - characters
|
34
|
+
# read ends marking by '$' symbol is substituted
|
35
|
+
# insertion and deletion marking by '*' symbol is substituted
|
36
|
+
self.read_bases.gsub!(/\^./, '')
|
37
|
+
self.read_bases.delete! '$'
|
38
|
+
self.read_bases.delete! '*'
|
39
|
+
# warn about reads with ambiguous codes
|
40
|
+
# if self.read_bases.match(/[^atgcATGC,\.\+\-0-9]/)
|
41
|
+
# warn "Ambiguous nucleotide\t#{self.read_bases}"
|
42
|
+
# end
|
43
|
+
end
|
44
|
+
|
45
|
+
# count bases matching reference and non-reference
|
46
|
+
# from snp variant and make a hash of bases with counts
|
47
|
+
# for indels return the read bases information instead
|
48
|
+
def bases_hash
|
49
|
+
if self.read_bases =~ /\+/
|
50
|
+
bases_hash = indels_to_hash('+')
|
51
|
+
elsif self.read_bases =~ /\-/
|
52
|
+
bases_hash = indels_to_hash('-')
|
53
|
+
else
|
54
|
+
bases_hash = snp_base_hash(self.read_bases)
|
55
|
+
end
|
56
|
+
# some indels will have ref base in the read and using
|
57
|
+
# sum of hash values is going to give wrong additional coverage
|
58
|
+
# from indels so including actual coverage from pileup
|
59
|
+
# bases_hash keys are :A, :C, :G, :T, :N, :ref, :indel and :cov
|
60
|
+
bases_hash[:cov] = self.coverage
|
61
|
+
bases_hash
|
62
|
+
end
|
63
|
+
|
64
|
+
# count bases from indels
|
65
|
+
# array of pileup bases is split at + / -
|
66
|
+
# and number after each + / - is counted
|
67
|
+
def count_indel_bases(delimiter)
|
68
|
+
array = self.read_bases.split(delimiter)
|
69
|
+
number = 0
|
70
|
+
array.shift
|
71
|
+
array.each do |element|
|
72
|
+
# deletions in reference could contain ambiguous codes,
|
73
|
+
number += /^(\d+)[#{@indelbases}]/.match(element)[1].to_i
|
74
|
+
end
|
75
|
+
number
|
76
|
+
end
|
77
|
+
|
78
|
+
# count bases matching reference and non-reference
|
79
|
+
# and calculate ratio of non_ref allele to total bases
|
80
|
+
def non_ref_count
|
81
|
+
read_bases = self.read_bases
|
82
|
+
if read_bases =~ /\+/
|
83
|
+
non_ref_count = indel_non_ref_count('+')
|
84
|
+
elsif read_bases =~ /\-/
|
85
|
+
non_ref_count = indel_non_ref_count('-')
|
86
|
+
else
|
87
|
+
non_ref_count = read_bases.count('atgcATGC')
|
88
|
+
end
|
89
|
+
non_ref_count
|
90
|
+
end
|
91
|
+
|
92
|
+
# check if the pileup has the parameters we are looking for
|
93
|
+
def is_var
|
94
|
+
ignore_reference_n = @defaults[:ignore_reference_n]
|
95
|
+
min_depth = @defaults[:min_depth]
|
96
|
+
min_non_ref_count = @defaults[:min_non_ref_count]
|
97
|
+
|
98
|
+
return false if self.ref_base == '*'
|
99
|
+
return false if ignore_reference_n and self.ref_base =~ /^[nN]$/
|
100
|
+
return true if self.coverage >= min_depth and self.non_ref_count >= min_non_ref_count
|
101
|
+
false
|
102
|
+
end
|
103
|
+
|
104
|
+
# count bases matching reference and non-reference
|
105
|
+
# and calculate ratio of non_ref allele to total bases
|
106
|
+
def non_ref_ratio
|
107
|
+
self.non_ref_count.to_f / self.coverage.to_f
|
108
|
+
end
|
109
|
+
|
110
|
+
# calculate var zygosity for non-polyploid variants
|
111
|
+
# increased range is used for heterozygosity for RNA-seq data
|
112
|
+
def var_mode
|
113
|
+
ht_low = @defaults[:ht_low]
|
114
|
+
ht_high = @defaults[:ht_high]
|
115
|
+
mode = ''
|
116
|
+
if self.non_ref_ratio.between?(ht_low, ht_high)
|
117
|
+
mode = :het
|
118
|
+
elsif self.non_ref_ratio > ht_high
|
119
|
+
mode = :hom
|
120
|
+
end
|
121
|
+
mode
|
122
|
+
end
|
123
|
+
|
124
|
+
# form hash of base information, [ATGC] counts for snp
|
125
|
+
# a hash of base proportion is calculated
|
126
|
+
# base proportion hash below a selected depth is empty
|
127
|
+
# base proportion below or equal to a noise factor are discarded
|
128
|
+
def var_base_frac
|
129
|
+
hash = self.bases_hash
|
130
|
+
snp_hash = {}
|
131
|
+
coverage = hash[:cov]
|
132
|
+
return snp_hash if coverage < @defaults[:min_depth]
|
133
|
+
# calculate proportion of each base in coverage
|
134
|
+
hash.each_key do | base |
|
135
|
+
next if base == :cov
|
136
|
+
freq = hash[base].to_f/coverage.to_f
|
137
|
+
next if freq <= @defaults[:noise]
|
138
|
+
snp_hash[base] = freq
|
139
|
+
end
|
140
|
+
snp_hash
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
private
|
145
|
+
|
146
|
+
# count number of indels and number non-indel base
|
147
|
+
# and return a hash with bases and indel counts
|
148
|
+
def indels_to_hash(delimiter)
|
149
|
+
non_indel_bases = String.new
|
150
|
+
array = self.read_bases.split(delimiter)
|
151
|
+
non_indel_bases << array.shift
|
152
|
+
array.each do |element|
|
153
|
+
# get number of nucleotides inserted or deleted
|
154
|
+
number = /^(\d+)[#{@indelbases}]/.match(element)[1].to_i
|
155
|
+
# capture remaining nucleotides
|
156
|
+
non_indel_bases << element.gsub(/^#{number}\w{#{number}}/, '')
|
157
|
+
end
|
158
|
+
bases_hash = snp_base_hash(non_indel_bases)
|
159
|
+
# check at least three reads are supporting indel
|
160
|
+
indel_count = self.read_bases.count(delimiter)
|
161
|
+
if indel_count >= @defaults[:min_indel_count_support]
|
162
|
+
bases_hash[:indel] = indel_count
|
163
|
+
end
|
164
|
+
bases_hash
|
165
|
+
end
|
166
|
+
|
167
|
+
def snp_base_hash(readbases)
|
168
|
+
non_indel_base_hash = {}
|
169
|
+
non_indel_base_hash[:ref] = readbases.count('.,')
|
170
|
+
non_indel_base_hash[:A] = readbases.count('aA')
|
171
|
+
non_indel_base_hash[:C] = readbases.count('cC')
|
172
|
+
non_indel_base_hash[:G] = readbases.count('gG')
|
173
|
+
non_indel_base_hash[:T] = readbases.count('tT')
|
174
|
+
# non_indel_base_hash[:N] = read_bases.count('nN')
|
175
|
+
non_indel_base_hash
|
176
|
+
end
|
177
|
+
|
178
|
+
def indel_non_ref_count(delimitter)
|
179
|
+
read_bases = self.read_bases
|
180
|
+
non_ref_count = read_bases.count(@indelbases)
|
181
|
+
indelcounts = read_bases.count(delimitter)
|
182
|
+
indel_bases = count_indel_bases(delimitter)
|
183
|
+
non_ref_count + indelcounts - indel_bases
|
184
|
+
end
|
185
|
+
|
186
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'bio-samtools'
|
3
|
+
require 'forwardable'
|
4
|
+
|
5
|
+
module Cheripic
|
6
|
+
|
7
|
+
class RegionsError < CheripicError; end
|
8
|
+
|
9
|
+
class Regions
|
10
|
+
|
11
|
+
include Enumerable
|
12
|
+
extend Forwardable
|
13
|
+
def_delegators :@id_len, :each, :each_key, :each_value, :length, :[]
|
14
|
+
attr_accessor :reference_db, :id_len
|
15
|
+
|
16
|
+
def initialize(assembly)
|
17
|
+
@reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>assembly})
|
18
|
+
@id_len = {}
|
19
|
+
self.get_id_len
|
20
|
+
end
|
21
|
+
|
22
|
+
def get_id_len
|
23
|
+
@reference_db.load_fai_entries
|
24
|
+
@reference_db.index.entries.each_entry do | entry |
|
25
|
+
@id_len[entry.id] = entry.length
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# for each id and position returns left and right sequence
|
30
|
+
# of pre-selected length
|
31
|
+
def fetch_seq(id, pos)
|
32
|
+
limit = Options.params.sel_seq_len + 1
|
33
|
+
len = @id_len[id]
|
34
|
+
low = pos-limit <= 0 ? 0 : pos-limit
|
35
|
+
high = pos+limit >= len ? len : pos+limit
|
36
|
+
region = Bio::DB::Fasta::Region.parse_region("#{id}:#{low}-#{pos-1}")
|
37
|
+
seq_left = @reference_db.fetch_sequence(region)
|
38
|
+
region = Bio::DB::Fasta::Region.parse_region("#{id}:#{pos+1}-#{high}")
|
39
|
+
seq_right = @reference_db.fetch_sequence(region)
|
40
|
+
[seq_left, seq_right]
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
@@ -0,0 +1,201 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'bio'
|
3
|
+
require 'forwardable'
|
4
|
+
|
5
|
+
module Cheripic
|
6
|
+
|
7
|
+
class VariantsError < CheripicError; end
|
8
|
+
|
9
|
+
class Variants
|
10
|
+
|
11
|
+
include Enumerable
|
12
|
+
extend Forwardable
|
13
|
+
def_delegators :@assembly, :each, :each_key, :each_value, :size, :length, :[]
|
14
|
+
attr_accessor :assembly, :has_run, :pileups, :hmes_frags, :bfr_frags
|
15
|
+
|
16
|
+
def initialize(options)
|
17
|
+
@params = options
|
18
|
+
@assembly = {}
|
19
|
+
@pileups = {}
|
20
|
+
Bio::FastaFormat.open(@params.assembly).each do |entry|
|
21
|
+
if entry.seq.length == 0
|
22
|
+
logger.error "No sequence found for entry #{entry.entry_id}"
|
23
|
+
raise VariantsError
|
24
|
+
end
|
25
|
+
contig = Contig.new(entry)
|
26
|
+
if @assembly.key?(contig.id)
|
27
|
+
logger.error "fasta id already found in the file for #{contig.id}"
|
28
|
+
logger.error 'make sure there are no duplicate entries in the fasta file'
|
29
|
+
raise VariantsError
|
30
|
+
end
|
31
|
+
@assembly[contig.id] = contig
|
32
|
+
@pileups[contig.id] = ContigPileups.new(contig.id)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Read and store pileup data for each bulk and parents
|
37
|
+
#
|
38
|
+
def analyse_pileups
|
39
|
+
@bg_bulk = @params.bg_bulk
|
40
|
+
@mut_parent = @params.mut_parent
|
41
|
+
@bg_parent = @params.bg_parent
|
42
|
+
|
43
|
+
%i{mut_bulk bg_bulk mut_parent bg_parent}.each do | input |
|
44
|
+
infile = @params[input]
|
45
|
+
if infile != ''
|
46
|
+
extract_pileup(infile, input)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
@has_run = true
|
51
|
+
end
|
52
|
+
|
53
|
+
def extract_pileup(pileupfile, sym)
|
54
|
+
# read mpileup file and process each variant
|
55
|
+
File.foreach(pileupfile) do |line|
|
56
|
+
pileup = Pileup.new(line)
|
57
|
+
if pileup.is_var
|
58
|
+
contig_obj = @pileups[pileup.ref_name]
|
59
|
+
contig_obj.send(sym).store(pileup.pos, pileup)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def compare_pileups
|
65
|
+
unless defined?(@has_run)
|
66
|
+
self.analyse_pileups
|
67
|
+
end
|
68
|
+
@assembly.each_key do | id |
|
69
|
+
contig = @assembly[id]
|
70
|
+
# extract parental hemi snps for polyploids before bulks are compared
|
71
|
+
if @mut_parent != '' or @bg_parent != ''
|
72
|
+
@pileups[id].hemisnps_in_parent
|
73
|
+
end
|
74
|
+
contig.hm_pos, contig.ht_pos, contig.hemi_pos = @pileups[id].bulks_compared
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def hmes_frags
|
79
|
+
# calculate every time method gets called
|
80
|
+
@hmes_frags = select_contigs(:hme_score)
|
81
|
+
end
|
82
|
+
|
83
|
+
def bfr_frags
|
84
|
+
unless defined?(@bfr_frags)
|
85
|
+
@bfr_frags = select_contigs(:bfr_score)
|
86
|
+
end
|
87
|
+
@bfr_frags
|
88
|
+
end
|
89
|
+
|
90
|
+
def select_contigs(ratio_type)
|
91
|
+
selected_contigs ={}
|
92
|
+
only_frag_with_vars = Options.params.only_frag_with_vars
|
93
|
+
@assembly.each_key do | frag |
|
94
|
+
if only_frag_with_vars
|
95
|
+
if ratio_type == :hme_score
|
96
|
+
# selecting fragments which have a variant
|
97
|
+
if @assembly[frag].hm_num + @assembly[frag].ht_num > 2 * Options.params.hmes_adjust
|
98
|
+
selected_contigs[frag] = @assembly[frag]
|
99
|
+
end
|
100
|
+
else # ratio_type == :bfr_score
|
101
|
+
# in polyploidy scenario selecting fragments with at least one bfr position
|
102
|
+
if @assembly[frag].hemi_num > 0
|
103
|
+
selected_contigs[frag] = @assembly[frag]
|
104
|
+
end
|
105
|
+
end
|
106
|
+
else
|
107
|
+
selected_contigs[frag] = @assembly[frag]
|
108
|
+
end
|
109
|
+
end
|
110
|
+
selected_contigs = filter_contigs(selected_contigs, ratio_type)
|
111
|
+
if only_frag_with_vars
|
112
|
+
logger.info "Selected #{selected_contigs.length} out of #{@assembly.length} fragments with #{ratio_type} score\n"
|
113
|
+
else
|
114
|
+
logger.info "No filtering was applied to fragments\n"
|
115
|
+
end
|
116
|
+
selected_contigs
|
117
|
+
end
|
118
|
+
|
119
|
+
def filter_contigs(selected_contigs, ratio_type)
|
120
|
+
cutoff = get_cutoff(selected_contigs, ratio_type)
|
121
|
+
selected_contigs.each_key do | frag |
|
122
|
+
if selected_contigs[frag].send(ratio_type) < cutoff
|
123
|
+
selected_contigs.delete(frag)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
selected_contigs
|
127
|
+
end
|
128
|
+
|
129
|
+
def get_cutoff(selected_contigs, ratio_type)
|
130
|
+
filter_out_low_hmes = Options.params.filter_out_low_hmes
|
131
|
+
# set minimum cut off hme_score or bfr_score to pick fragments with variants
|
132
|
+
# calculate min hme score for back or out crossed data or bfr_score for polypoidy data
|
133
|
+
# if no filtering applied set cutoff to 1.1
|
134
|
+
if filter_out_low_hmes
|
135
|
+
if ratio_type == :hme_score
|
136
|
+
adjust = Options.params.hmes_adjust
|
137
|
+
if Options.params.cross_type == 'back'
|
138
|
+
cutoff = (1.0/adjust) + 1.0
|
139
|
+
else # outcross
|
140
|
+
cutoff = (2.0/adjust) + 1.0
|
141
|
+
end
|
142
|
+
else # ratio_type is bfr_score
|
143
|
+
cutoff = bfr_cutoff(selected_contigs)
|
144
|
+
end
|
145
|
+
else
|
146
|
+
cutoff = 0.0
|
147
|
+
end
|
148
|
+
cutoff
|
149
|
+
end
|
150
|
+
|
151
|
+
def bfr_cutoff(selected_contigs, prop=0.1)
|
152
|
+
ratios = []
|
153
|
+
selected_contigs.each_key do | frag |
|
154
|
+
ratios << selected_contigs[frag].bfr_score
|
155
|
+
end
|
156
|
+
ratios.sort!.reverse!
|
157
|
+
index = (ratios.length * prop)/100
|
158
|
+
# set a minmum index to get at least one contig
|
159
|
+
if index < 1
|
160
|
+
index = 1
|
161
|
+
end
|
162
|
+
ratios[index - 1]
|
163
|
+
end
|
164
|
+
|
165
|
+
# method is to discard homozygous variant positions for which background bulk
|
166
|
+
# pileup shows proportion higher than 0.35 for variant allele/non-reference allele
|
167
|
+
# a recessive variant is expected to have 1/3rd frequency in background bulk
|
168
|
+
def verify_bg_bulk_pileup
|
169
|
+
unless defined?(@hmes_frags)
|
170
|
+
self.hmes_frags
|
171
|
+
end
|
172
|
+
@hmes_frags.each_key do | frag |
|
173
|
+
positions = @assembly[frag].hm_pos.keys
|
174
|
+
contig_pileup_obj = @pileups[frag]
|
175
|
+
positions.each do | pos |
|
176
|
+
if contig_pileup_obj.mut_bulk.key?(pos)
|
177
|
+
mut_pileup = contig_pileup_obj.mut_bulk[pos]
|
178
|
+
if mut_pileup.is_var
|
179
|
+
if contig_pileup_obj.bg_bulk.key?(pos)
|
180
|
+
bg_pileup = contig_pileup_obj.bg_bulk[pos]
|
181
|
+
if bg_pileup.non_ref_ratio > 0.35
|
182
|
+
@assembly[frag].hm_pos.delete(pos)
|
183
|
+
end
|
184
|
+
end
|
185
|
+
else
|
186
|
+
# this should not happen, may be catch as as an error
|
187
|
+
@assembly[frag].hm_pos.delete(pos)
|
188
|
+
end
|
189
|
+
else
|
190
|
+
# this should not happen, may be catch as as an error
|
191
|
+
@assembly[frag].hm_pos.delete(pos)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
195
|
+
# recalculate hmes_frags once pileups are verified
|
196
|
+
self.hmes_frags
|
197
|
+
end
|
198
|
+
|
199
|
+
end # Variants
|
200
|
+
|
201
|
+
end # Cheripic
|
data/lib/cheripic/version.rb
CHANGED
data/lib/cheripic.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'cheripic/cmd'
|
2
|
-
require 'cheripic/version'
|
3
1
|
|
4
2
|
# set up a golbal logger object to access across module
|
5
3
|
require 'yell'
|
@@ -23,3 +21,14 @@ module Cheripic
|
|
23
21
|
Object.send :include, Yell::Loggable
|
24
22
|
|
25
23
|
end # Cheripic
|
24
|
+
|
25
|
+
require 'cheripic/cmd'
|
26
|
+
require 'cheripic/version'
|
27
|
+
require 'cheripic/implementer'
|
28
|
+
require 'cheripic/variants'
|
29
|
+
require 'cheripic/contig'
|
30
|
+
require 'cheripic/pileup'
|
31
|
+
require 'cheripic/options'
|
32
|
+
require 'cheripic/contig_pileups'
|
33
|
+
require 'cheripic/bfr'
|
34
|
+
require 'cheripic/regions'
|
metadata
CHANGED
@@ -1,20 +1,23 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cheripic
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shyam Rallapalli
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-07-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: yell
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.0'
|
20
|
+
- - ">="
|
18
21
|
- !ruby/object:Gem::Version
|
19
22
|
version: 2.0.5
|
20
23
|
type: :runtime
|
@@ -22,6 +25,9 @@ dependencies:
|
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
23
26
|
requirements:
|
24
27
|
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '2.0'
|
30
|
+
- - ">="
|
25
31
|
- !ruby/object:Gem::Version
|
26
32
|
version: 2.0.5
|
27
33
|
- !ruby/object:Gem::Dependency
|
@@ -29,6 +35,9 @@ dependencies:
|
|
29
35
|
requirement: !ruby/object:Gem::Requirement
|
30
36
|
requirements:
|
31
37
|
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '2.1'
|
40
|
+
- - ">="
|
32
41
|
- !ruby/object:Gem::Version
|
33
42
|
version: 2.1.2
|
34
43
|
type: :runtime
|
@@ -36,6 +45,9 @@ dependencies:
|
|
36
45
|
version_requirements: !ruby/object:Gem::Requirement
|
37
46
|
requirements:
|
38
47
|
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '2.1'
|
50
|
+
- - ">="
|
39
51
|
- !ruby/object:Gem::Version
|
40
52
|
version: 2.1.2
|
41
53
|
- !ruby/object:Gem::Dependency
|
@@ -43,6 +55,9 @@ dependencies:
|
|
43
55
|
requirement: !ruby/object:Gem::Requirement
|
44
56
|
requirements:
|
45
57
|
- - "~>"
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '1.5'
|
60
|
+
- - ">="
|
46
61
|
- !ruby/object:Gem::Version
|
47
62
|
version: 1.5.0
|
48
63
|
type: :runtime
|
@@ -50,13 +65,33 @@ dependencies:
|
|
50
65
|
version_requirements: !ruby/object:Gem::Requirement
|
51
66
|
requirements:
|
52
67
|
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '1.5'
|
70
|
+
- - ">="
|
53
71
|
- !ruby/object:Gem::Version
|
54
72
|
version: 1.5.0
|
73
|
+
- !ruby/object:Gem::Dependency
|
74
|
+
name: bio-gngm
|
75
|
+
requirement: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - "~>"
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: 0.2.1
|
80
|
+
type: :runtime
|
81
|
+
prerelease: false
|
82
|
+
version_requirements: !ruby/object:Gem::Requirement
|
83
|
+
requirements:
|
84
|
+
- - "~>"
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: 0.2.1
|
55
87
|
- !ruby/object:Gem::Dependency
|
56
88
|
name: rinruby
|
57
89
|
requirement: !ruby/object:Gem::Requirement
|
58
90
|
requirements:
|
59
91
|
- - "~>"
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '2.0'
|
94
|
+
- - ">="
|
60
95
|
- !ruby/object:Gem::Version
|
61
96
|
version: 2.0.3
|
62
97
|
type: :runtime
|
@@ -64,8 +99,25 @@ dependencies:
|
|
64
99
|
version_requirements: !ruby/object:Gem::Requirement
|
65
100
|
requirements:
|
66
101
|
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '2.0'
|
104
|
+
- - ">="
|
67
105
|
- !ruby/object:Gem::Version
|
68
106
|
version: 2.0.3
|
107
|
+
- !ruby/object:Gem::Dependency
|
108
|
+
name: activesupport
|
109
|
+
requirement: !ruby/object:Gem::Requirement
|
110
|
+
requirements:
|
111
|
+
- - "~>"
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: 4.2.6
|
114
|
+
type: :development
|
115
|
+
prerelease: false
|
116
|
+
version_requirements: !ruby/object:Gem::Requirement
|
117
|
+
requirements:
|
118
|
+
- - "~>"
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: 4.2.6
|
69
121
|
- !ruby/object:Gem::Dependency
|
70
122
|
name: bundler
|
71
123
|
requirement: !ruby/object:Gem::Requirement
|
@@ -126,6 +178,9 @@ dependencies:
|
|
126
178
|
name: simplecov
|
127
179
|
requirement: !ruby/object:Gem::Requirement
|
128
180
|
requirements:
|
181
|
+
- - "~>"
|
182
|
+
- !ruby/object:Gem::Version
|
183
|
+
version: '0.8'
|
129
184
|
- - ">="
|
130
185
|
- !ruby/object:Gem::Version
|
131
186
|
version: 0.8.2
|
@@ -133,6 +188,9 @@ dependencies:
|
|
133
188
|
prerelease: false
|
134
189
|
version_requirements: !ruby/object:Gem::Requirement
|
135
190
|
requirements:
|
191
|
+
- - "~>"
|
192
|
+
- !ruby/object:Gem::Version
|
193
|
+
version: '0.8'
|
136
194
|
- - ">="
|
137
195
|
- !ruby/object:Gem::Version
|
138
196
|
version: 0.8.2
|
@@ -140,6 +198,9 @@ dependencies:
|
|
140
198
|
name: shoulda
|
141
199
|
requirement: !ruby/object:Gem::Requirement
|
142
200
|
requirements:
|
201
|
+
- - "~>"
|
202
|
+
- !ruby/object:Gem::Version
|
203
|
+
version: '3.5'
|
143
204
|
- - ">="
|
144
205
|
- !ruby/object:Gem::Version
|
145
206
|
version: 3.5.0
|
@@ -147,6 +208,9 @@ dependencies:
|
|
147
208
|
prerelease: false
|
148
209
|
version_requirements: !ruby/object:Gem::Requirement
|
149
210
|
requirements:
|
211
|
+
- - "~>"
|
212
|
+
- !ruby/object:Gem::Version
|
213
|
+
version: '3.5'
|
150
214
|
- - ">="
|
151
215
|
- !ruby/object:Gem::Version
|
152
216
|
version: 3.5.0
|
@@ -154,6 +218,9 @@ dependencies:
|
|
154
218
|
name: coveralls
|
155
219
|
requirement: !ruby/object:Gem::Requirement
|
156
220
|
requirements:
|
221
|
+
- - "~>"
|
222
|
+
- !ruby/object:Gem::Version
|
223
|
+
version: '0.7'
|
157
224
|
- - ">="
|
158
225
|
- !ruby/object:Gem::Version
|
159
226
|
version: 0.7.2
|
@@ -161,6 +228,9 @@ dependencies:
|
|
161
228
|
prerelease: false
|
162
229
|
version_requirements: !ruby/object:Gem::Requirement
|
163
230
|
requirements:
|
231
|
+
- - "~>"
|
232
|
+
- !ruby/object:Gem::Version
|
233
|
+
version: '0.7'
|
164
234
|
- - ">="
|
165
235
|
- !ruby/object:Gem::Version
|
166
236
|
version: 0.7.2
|
@@ -183,7 +253,15 @@ files:
|
|
183
253
|
- bin/setup
|
184
254
|
- cheripic.gemspec
|
185
255
|
- lib/cheripic.rb
|
256
|
+
- lib/cheripic/bfr.rb
|
186
257
|
- lib/cheripic/cmd.rb
|
258
|
+
- lib/cheripic/contig.rb
|
259
|
+
- lib/cheripic/contig_pileups.rb
|
260
|
+
- lib/cheripic/implementer.rb
|
261
|
+
- lib/cheripic/options.rb
|
262
|
+
- lib/cheripic/pileup.rb
|
263
|
+
- lib/cheripic/regions.rb
|
264
|
+
- lib/cheripic/variants.rb
|
187
265
|
- lib/cheripic/version.rb
|
188
266
|
homepage: https://github.com/shyamrallapalli/cheripic
|
189
267
|
licenses:
|