cheripic 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile +0 -1
- data/bin/cheripic +13 -0
- data/cheripic.gemspec +2 -2
- data/lib/cheripic.rb +7 -1
- data/lib/cheripic/bfr.rb +21 -5
- data/lib/cheripic/cmd.rb +36 -14
- data/lib/cheripic/contig.rb +34 -7
- data/lib/cheripic/contig_pileups.rb +70 -26
- data/lib/cheripic/implementer.rb +24 -3
- data/lib/cheripic/options.rb +110 -10
- data/lib/cheripic/pileup.rb +150 -159
- data/lib/cheripic/regions.rb +20 -4
- data/lib/cheripic/variants.rb +59 -12
- data/lib/cheripic/version.rb +5 -1
- metadata +20 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 458f681424a73ea58acb8aefa73d68019ad0854d
|
4
|
+
data.tar.gz: 23547939b1fead465d06d2f6d8e45ce4172b1cb1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2e3af0df95197769c542b4aab76444a6b14842890b46a97d6be10101f267db5f5df7d1ed8d67083ac8890a866e1cab678a9b23c5dc03b1edb7b8fc2150b35097
|
7
|
+
data.tar.gz: 9aa159df9086102679bd6359d4a5bf94dfe72f52d9c11e66259ffd40754f767001bb2a67e996b958018a009db0a6aa558c7ebe4001c2f6bce9b8993bdfd66091
|
data/.gitignore
CHANGED
data/Gemfile
CHANGED
data/bin/cheripic
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'cheripic'
|
3
|
+
|
4
|
+
# rescue errors to get clean error messages through the logger
|
5
|
+
# backtrace can be accessed by setting --loglevel to 'debug' option
|
6
|
+
begin
|
7
|
+
submission = Cheripic::Cmd.new ARGV
|
8
|
+
submission.run
|
9
|
+
rescue Cheripic::CheripicError => e
|
10
|
+
logger.error e.message
|
11
|
+
logger.debug e.backtrace unless e.backtrace.nil?
|
12
|
+
exit 1
|
13
|
+
end
|
data/cheripic.gemspec
CHANGED
@@ -22,12 +22,12 @@ Gem::Specification.new do |spec|
|
|
22
22
|
spec.add_runtime_dependency 'yell', '~> 2.0', '>= 2.0.5'
|
23
23
|
spec.add_runtime_dependency 'trollop', '~> 2.1', '>= 2.1.2'
|
24
24
|
spec.add_runtime_dependency 'bio', '~> 1.5', '>= 1.5.0'
|
25
|
-
|
25
|
+
spec.add_dependency 'bio-samtools', '~> 2.4.0'
|
26
26
|
spec.add_dependency 'bio-gngm', '~> 0.2.1'
|
27
27
|
spec.add_runtime_dependency 'rinruby', '~> 2.0', '>= 2.0.3'
|
28
28
|
|
29
29
|
spec.add_development_dependency 'activesupport', '~> 4.2.6'
|
30
|
-
spec.add_development_dependency 'bundler', '~> 1.
|
30
|
+
spec.add_development_dependency 'bundler', '~> 1.7.6'
|
31
31
|
spec.add_development_dependency 'rake', '~> 10.0'
|
32
32
|
spec.add_development_dependency 'minitest'
|
33
33
|
spec.add_development_dependency 'minitest-reporters', '>= 1.0.17'
|
data/lib/cheripic.rb
CHANGED
@@ -1,11 +1,17 @@
|
|
1
1
|
|
2
|
-
#
|
2
|
+
# sets up a global logger object to access across module
|
3
3
|
require 'yell'
|
4
|
+
|
5
|
+
# Computing Homozygosity Enriched Regions In genomes to Prioritize Identification of Candidate variants (CHERIPIC)
|
6
|
+
# Cheripic module provides tools and pipeline to extract potential candidate mutations
|
7
|
+
# in around the region of the genome hosting the causative mutation behind the phenotype of interest.
|
4
8
|
module Cheripic
|
5
9
|
|
6
10
|
# custom error handling
|
7
11
|
class CheripicError < StandardError; end
|
12
|
+
# custom error handling for IO
|
8
13
|
class CheripicIOError < CheripicError; end
|
14
|
+
# custom error handling for Arg
|
9
15
|
class CheripicArgError < CheripicError; end
|
10
16
|
|
11
17
|
# Define a logger and pass `Object` as name.
|
data/lib/cheripic/bfr.rb
CHANGED
@@ -2,16 +2,26 @@
|
|
2
2
|
|
3
3
|
module Cheripic
|
4
4
|
|
5
|
+
# Custom error handling for Bfr class
|
5
6
|
class BfrError < CheripicError; end
|
6
7
|
|
8
|
+
# A class to calculate bulk frequency ratio (bfr) using one or two hashes of base fractions resulted from pileup
|
9
|
+
#
|
10
|
+
# @!attribute [rw] bfr_adj
|
11
|
+
# @return [Float] a float value to adjust the bfr calculation
|
7
12
|
class Bfr
|
8
13
|
|
9
14
|
attr_accessor :bfr_adj
|
10
15
|
|
11
|
-
# get bulk frequency ratio (bfr) for
|
12
|
-
#
|
16
|
+
# A method to get bulk frequency ratio (bfr) for selected hemi snps.
|
17
|
+
# This is done by selecting which hash (mutant or background) to use for bfr calculation
|
18
|
+
# either calculates fraction or bfr
|
19
|
+
# and ignores positions with complex variants.
|
20
|
+
# @param mut_hash [Hash] a hash of base fractions from pileup of mutant bulk
|
21
|
+
# @param bg_hash [Hash] a hash of base fractions from pileup of background bulk
|
22
|
+
# @return [Float] a ratio calculated
|
13
23
|
def self.get_bfr(mut_hash, bg_hash='')
|
14
|
-
@bfr_adj = Options.
|
24
|
+
@bfr_adj = Options.bfr_adjust
|
15
25
|
if bg_hash != ''
|
16
26
|
# checking if only two vars in base hash and that includes ref
|
17
27
|
# checking if only one var in hemi snp
|
@@ -37,9 +47,12 @@ module Cheripic
|
|
37
47
|
bfr
|
38
48
|
end
|
39
49
|
|
40
|
-
# calculate bfr using
|
50
|
+
# A method to calculate bfr using a base fraction hash with hemi-snp
|
51
|
+
# @param two_key_hash [Hash] a hash of base fractions from pileup with 2 keys (a ref and variant base)
|
52
|
+
# @param other_hash [Hash] a hash of base fractions from pileup
|
53
|
+
# @return [Float] a ratio calculated
|
41
54
|
def self.calculate_bfr(two_key_hash, other_hash)
|
42
|
-
#
|
55
|
+
# if :ref is absent such as below noise depth, then set to zero
|
43
56
|
unless two_key_hash.key?(:ref)
|
44
57
|
two_key_hash[:ref] = 0
|
45
58
|
end
|
@@ -63,6 +76,9 @@ module Cheripic
|
|
63
76
|
bfr
|
64
77
|
end
|
65
78
|
|
79
|
+
# A method to calculate ratio using a base fraction hash
|
80
|
+
# @param hash [Hash] a hash of base fractions from pileup with 2 or 1 keys
|
81
|
+
# @return [Array<Float><String>] an array of ratio calculated and base character
|
66
82
|
def self.calc_fraction(hash)
|
67
83
|
unless hash.key?(:ref)
|
68
84
|
hash[:ref] = 0
|
data/lib/cheripic/cmd.rb
CHANGED
@@ -2,6 +2,10 @@
|
|
2
2
|
|
3
3
|
module Cheripic
|
4
4
|
|
5
|
+
# A command line option and processing object to handle input options
|
6
|
+
#
|
7
|
+
# @!attribute [rw] options
|
8
|
+
# @return [Hash] a hash of trollop option names as keys and user or default setting as values
|
5
9
|
class Cmd
|
6
10
|
|
7
11
|
require 'trollop'
|
@@ -10,11 +14,16 @@ module Cheripic
|
|
10
14
|
|
11
15
|
attr_accessor :options
|
12
16
|
|
17
|
+
# creates a Cmd object using input string entry
|
18
|
+
# @param args [String]
|
13
19
|
def initialize(args)
|
14
20
|
@options = parse_arguments(args)
|
15
21
|
check_arguments
|
16
22
|
end
|
17
23
|
|
24
|
+
# method to check input command string and run appropriate
|
25
|
+
# method of the object (help or examples or parsing arguments)
|
26
|
+
# @param args [String]
|
18
27
|
def parse_arguments(args)
|
19
28
|
Trollop::with_standard_exception_handling argument_parser do
|
20
29
|
if args.empty? || args.include?('-h') || args.include?('--help')
|
@@ -26,6 +35,8 @@ module Cheripic
|
|
26
35
|
end
|
27
36
|
end
|
28
37
|
|
38
|
+
# trollop argument_parser for input args string and
|
39
|
+
# @return [Hash] a hash of trollop option names as keys and user or default setting as values
|
29
40
|
def argument_parser
|
30
41
|
cmds = self
|
31
42
|
Trollop::Parser.new do
|
@@ -106,40 +117,44 @@ module Cheripic
|
|
106
117
|
end
|
107
118
|
end
|
108
119
|
|
120
|
+
# help message to display from command line
|
109
121
|
def help_message
|
110
|
-
<<-EOS
|
122
|
+
msg = <<-EOS
|
111
123
|
|
112
|
-
Cheripic v#{Cheripic::VERSION.dup}
|
113
|
-
Authors: Shyam Rallapalli and Dan MacLean
|
124
|
+
Cheripic v#{Cheripic::VERSION.dup}
|
125
|
+
Authors: Shyam Rallapalli and Dan MacLean
|
114
126
|
|
115
|
-
Description: Candidate mutation and closely linked marker selection for non reference genomes
|
116
|
-
Uses bulk segregant data from non-reference sequence genomes
|
127
|
+
Description: Candidate mutation and closely linked marker selection for non reference genomes
|
128
|
+
Uses bulk segregant data from non-reference sequence genomes
|
117
129
|
|
118
|
-
Inputs:
|
119
|
-
1. Needs a reference fasta file of asssembly use for variant analysis
|
120
|
-
2. Pileup files for mutant (phenotype of interest) bulks and background (wildtype phenotype) bulks
|
121
|
-
3. If polyploid species, include of pileup from one or both parents
|
130
|
+
Inputs:
|
131
|
+
1. Needs a reference fasta file of asssembly use for variant analysis
|
132
|
+
2. Pileup files for mutant (phenotype of interest) bulks and background (wildtype phenotype) bulks
|
133
|
+
3. If polyploid species, include of pileup from one or both parents
|
122
134
|
|
123
|
-
USAGE:
|
124
|
-
cheripic <options>
|
135
|
+
USAGE:
|
136
|
+
cheripic <options>
|
125
137
|
|
126
|
-
OPTIONS:
|
138
|
+
OPTIONS:
|
127
139
|
|
128
140
|
EOS
|
141
|
+
msg.split("\n").map{ |line| line.lstrip }.join("\n")
|
129
142
|
end
|
130
143
|
|
144
|
+
# examples to display from command line
|
131
145
|
def print_examples
|
132
146
|
msg = <<-EOS
|
133
147
|
|
134
|
-
|
148
|
+
Cheripic v#{Cheripic::VERSION.dup}
|
135
149
|
|
136
|
-
|
150
|
+
EXAMPLE COMMANDS:
|
137
151
|
|
138
152
|
EOS
|
139
153
|
puts msg.split("\n").map{ |line| line.lstrip }.join("\n")
|
140
154
|
exit(0)
|
141
155
|
end
|
142
156
|
|
157
|
+
# calls other methods to check if command line inputs are valid
|
143
158
|
def check_arguments
|
144
159
|
check_output_dir
|
145
160
|
check_log_level
|
@@ -153,6 +168,7 @@ OPTIONS:
|
|
153
168
|
# end
|
154
169
|
# end
|
155
170
|
|
171
|
+
# checks if input files are valid
|
156
172
|
def check_input_files
|
157
173
|
if @options[:polyploidy]
|
158
174
|
inputfiles = %i{assembly mut_bulk bg_bulk mut_parent bg_parent}
|
@@ -173,6 +189,7 @@ OPTIONS:
|
|
173
189
|
end
|
174
190
|
end
|
175
191
|
|
192
|
+
# checks if output directory already exists
|
176
193
|
def check_output_dir
|
177
194
|
if Dir.exist?(@options[:output])
|
178
195
|
raise CheripicArgError.new "#{@options[:output]} directory exists" +
|
@@ -180,6 +197,7 @@ OPTIONS:
|
|
180
197
|
end
|
181
198
|
end
|
182
199
|
|
200
|
+
# checks and sets logger level
|
183
201
|
def check_log_level
|
184
202
|
unless %w(error info warn debug).include?(@options[:loglevel])
|
185
203
|
raise CheripicArgError.new "Loglevel #{@options[:loglevel]} is not valid. " +
|
@@ -188,6 +206,10 @@ OPTIONS:
|
|
188
206
|
logger.level = Yell::Level.new @options[:loglevel].to_sym
|
189
207
|
end
|
190
208
|
|
209
|
+
# Initializes an Implementer object using input options
|
210
|
+
# and calls run method of the Implementer to start the pipeline
|
211
|
+
# A hash of trollop option names as keys and user or default
|
212
|
+
# setting as values is passed to Implementer object
|
191
213
|
def run
|
192
214
|
@options[:output] = File.expand_path @options[:output]
|
193
215
|
analysis = Implementer.new(@options)
|
data/lib/cheripic/contig.rb
CHANGED
@@ -4,16 +4,29 @@ require 'forwardable'
|
|
4
4
|
|
5
5
|
module Cheripic
|
6
6
|
|
7
|
+
# Custom error handling for Contig class
|
7
8
|
class ContigError < CheripicError; end
|
8
9
|
|
10
|
+
# A contig object from assembly that stores positions of
|
11
|
+
# homozygous, heterozygous and hemi-variants
|
12
|
+
#
|
13
|
+
# @!attribute [rw] hm_pos
|
14
|
+
# @return [Hash] a hash of homozygous variant positions as keys and allele frequency as values
|
15
|
+
# @!attribute [rw] ht_pos
|
16
|
+
# @return [Hash] a hash of heterozygous variant positions as keys and allele frequency as values
|
17
|
+
# @!attribute [rw] hemi_pos
|
18
|
+
# @return [Hash] a hash of hemi-variant positions as keys and allele frequency as values
|
19
|
+
# @!attribute [r] id
|
20
|
+
# @return [String] id of the contig in assembly taken from fasta file
|
21
|
+
# @!attribute [r] length
|
22
|
+
# @return [Integer] length of contig in bases
|
9
23
|
class Contig
|
10
24
|
|
11
|
-
|
12
|
-
|
13
|
-
# delegate [:size, :length] => :@contig
|
14
|
-
# def_delegator :@contig, :entry_id, :id
|
15
|
-
attr_accessor :hm_pos, :ht_pos, :hemi_pos, :id, :length
|
25
|
+
attr_accessor :hm_pos, :ht_pos, :hemi_pos
|
26
|
+
attr_reader :id, :length
|
16
27
|
|
28
|
+
# creates a Contig object using fasta entry
|
29
|
+
# @param fasta [Bio::FastaFormat] an individual fasta entry from input assembly file
|
17
30
|
def initialize (fasta)
|
18
31
|
@id = fasta.entry_id
|
19
32
|
@length = fasta.length
|
@@ -22,16 +35,23 @@ module Cheripic
|
|
22
35
|
@hemi_pos = {}
|
23
36
|
end
|
24
37
|
|
38
|
+
# Number of homozygous variants identified in the contig
|
39
|
+
# @return [Integer]
|
25
40
|
def hm_num
|
26
41
|
self.hm_pos.length
|
27
42
|
end
|
28
43
|
|
44
|
+
# Number of heterozygous variants identified in the contig
|
45
|
+
# @return [Integer]
|
29
46
|
def ht_num
|
30
47
|
self.ht_pos.length
|
31
48
|
end
|
32
49
|
|
50
|
+
# Homozygosity enrichment score calculated using
|
51
|
+
# hm_num and ht_num of the contig object
|
52
|
+
# @return [Float]
|
33
53
|
def hme_score
|
34
|
-
hmes_adjust = Options.
|
54
|
+
hmes_adjust = Options.hmes_adjust
|
35
55
|
if self.hm_num == 0 and self.ht_num == 0
|
36
56
|
0.0
|
37
57
|
else
|
@@ -39,10 +59,15 @@ module Cheripic
|
|
39
59
|
end
|
40
60
|
end
|
41
61
|
|
62
|
+
# Number of hemi-variants identified in the contig
|
63
|
+
# @return [Integer]
|
42
64
|
def hemi_num
|
43
65
|
self.hemi_pos.length
|
44
66
|
end
|
45
67
|
|
68
|
+
# Mean of bulk frequency ratios (bfr) calculated using
|
69
|
+
# bfr values all hemi_pos of the contig
|
70
|
+
# @return [Float]
|
46
71
|
def bfr_score
|
47
72
|
if self.hemi_pos.values.empty?
|
48
73
|
0.0
|
@@ -51,7 +76,9 @@ module Cheripic
|
|
51
76
|
end
|
52
77
|
end
|
53
78
|
|
54
|
-
#
|
79
|
+
# Calculates mean of an array of numbers
|
80
|
+
# @param array [Array] an array of bfr values from hemi_snp
|
81
|
+
# @return [Float] mean value as float
|
55
82
|
def geom_mean(array)
|
56
83
|
return array[0].to_f if array.length == 1
|
57
84
|
array.reduce(:+) / array.size.to_f
|
@@ -4,8 +4,25 @@ require 'forwardable'
|
|
4
4
|
|
5
5
|
module Cheripic
|
6
6
|
|
7
|
+
# Custom error handling for ContigPileup class
|
7
8
|
class ContigPileupsError < CheripicError; end
|
8
9
|
|
10
|
+
# A ContigPileup object for each contig from assembly that stores
|
11
|
+
# pileup file information and variants are selected from analysis of pileup files
|
12
|
+
# selected variants from pileup files is stored as hashes
|
13
|
+
#
|
14
|
+
# @!attribute [rw] id
|
15
|
+
# @return [String] id of the contig in assembly taken from fasta file
|
16
|
+
# @!attribute [rw] mut_bulk
|
17
|
+
# @return [Hash] a hash of variant positions from mut_bulk as keys and pileup info as values
|
18
|
+
# @!attribute [rw] bg_bulk
|
19
|
+
# @return [Hash] a hash of variant positions from bg_bulk as keys and pileup info as values
|
20
|
+
# @!attribute [rw] mut_parent
|
21
|
+
# @return [Hash] a hash of variant positions from mut_parent as keys and pileup info as values
|
22
|
+
# @!attribute [rw] bg_parent
|
23
|
+
# @return [Hash] a hash of variant positions from bg_parent as keys and pileup info as values
|
24
|
+
# @!attribute [rw] parent_hemi
|
25
|
+
# @return [Hash] a hash of hemi-variant positions as keys and bfr calculated from parent bulks as values
|
9
26
|
class ContigPileups
|
10
27
|
|
11
28
|
include Enumerable
|
@@ -17,6 +34,8 @@ module Cheripic
|
|
17
34
|
attr_accessor :id, :parent_hemi
|
18
35
|
attr_accessor :mut_bulk, :bg_bulk, :mut_parent, :bg_parent
|
19
36
|
|
37
|
+
# creates a ContigPileup object using fasta entry id
|
38
|
+
# @param fasta [String] a contig id from fasta entry
|
20
39
|
def initialize (fasta)
|
21
40
|
@id = fasta
|
22
41
|
@mut_bulk = {}
|
@@ -26,12 +45,15 @@ module Cheripic
|
|
26
45
|
@parent_hemi = {}
|
27
46
|
end
|
28
47
|
|
48
|
+
# bulk pileups are compared and variant positions are selected
|
49
|
+
# @return [Array<Hash>] variant positions are stored in hashes
|
50
|
+
# for homozygous, heterozygous and hemi-variant positions
|
29
51
|
def bulks_compared
|
30
52
|
@hm_pos = {}
|
31
53
|
@ht_pos = {}
|
32
54
|
@hemi_pos = {}
|
33
55
|
@mut_bulk.each_key do | pos |
|
34
|
-
if Options.
|
56
|
+
if Options.polyploidy and @parent_hemi.key?(pos)
|
35
57
|
bg_bases = ''
|
36
58
|
if @bg_bulk.key?(pos)
|
37
59
|
bg_bases = @bg_bulk[pos].var_base_frac
|
@@ -46,9 +68,11 @@ module Cheripic
|
|
46
68
|
[@hm_pos, @ht_pos, @hemi_pos]
|
47
69
|
end
|
48
70
|
|
49
|
-
#
|
50
|
-
#
|
51
|
-
#
|
71
|
+
# mut_bulk and bg_bulk pileups are compared at selected position of the contig.
|
72
|
+
# Empty hash results from position below selected coverage
|
73
|
+
# or bases freq below noise and such positions are deleted.
|
74
|
+
# @param pos [Integer] position in the contig
|
75
|
+
# stores variant type, position and allele fraction to either @hm_pos or @ht_pos hashes
|
52
76
|
def compare_pileup(pos)
|
53
77
|
base_hash = @mut_bulk[pos].var_base_frac
|
54
78
|
base_hash.delete(:ref)
|
@@ -56,22 +80,43 @@ module Cheripic
|
|
56
80
|
# we could ignore complex loci or
|
57
81
|
# take the variant type based on predominant base
|
58
82
|
if base_hash.length > 1
|
59
|
-
|
83
|
+
fraction = base_hash.values.max
|
84
|
+
mut_type = var_mode(fraction)
|
60
85
|
else
|
61
|
-
|
62
|
-
mut_type
|
86
|
+
fraction = base_hash[base_hash.keys[0]]
|
87
|
+
mut_type = var_mode(fraction)
|
63
88
|
end
|
64
89
|
if @bg_bulk.key?(pos)
|
65
90
|
bg_type = bg_bulk_var(pos)
|
66
91
|
mut_type = compare_var_type(mut_type, bg_type)
|
67
92
|
end
|
68
93
|
unless mut_type == nil
|
69
|
-
categorise_pos(mut_type, pos,
|
94
|
+
categorise_pos(mut_type, pos, fraction)
|
70
95
|
end
|
71
96
|
end
|
72
97
|
|
73
|
-
#
|
74
|
-
#
|
98
|
+
# Categorizes variant zygosity based on the allele fraction provided.
|
99
|
+
# Uses lower and upper limit set for heterozygosity in the options.
|
100
|
+
# @note consider increasing the range of heterozygosity limits for RNA-seq data
|
101
|
+
# @param fraction [Float] allele fraction
|
102
|
+
# @return [Symbol] of either :het or :hom to represent heterozygous or homozygous respectively
|
103
|
+
def var_mode(fraction)
|
104
|
+
ht_low = Options.htlow
|
105
|
+
ht_high = Options.hthigh
|
106
|
+
mode = ''
|
107
|
+
if fraction.between?(ht_low, ht_high)
|
108
|
+
mode = :het
|
109
|
+
elsif fraction > ht_high
|
110
|
+
mode = :hom
|
111
|
+
end
|
112
|
+
mode
|
113
|
+
end
|
114
|
+
|
115
|
+
# Simple comparison of variant type of mut and bg bulks at a position
|
116
|
+
# If both bulks have homozygous variant at selected position then it is ignored
|
117
|
+
# @param muttype [Symbol] values are either :hom or :het
|
118
|
+
# @param bgtype [Symbol] values are either :hom or :het
|
119
|
+
# @return [Symbol] variant mode of the mut bulk (:hom or :het) at the position or nil
|
75
120
|
def compare_var_type(muttype, bgtype)
|
76
121
|
if muttype == :hom and bgtype == :hom
|
77
122
|
nil
|
@@ -80,17 +125,26 @@ module Cheripic
|
|
80
125
|
end
|
81
126
|
end
|
82
127
|
|
128
|
+
# Method to extract var_mode from pileup information at a position in contig
|
129
|
+
#
|
130
|
+
# @param pos [Integer] position in the contig
|
131
|
+
# @return [Symbol] variant mode of the background bulk (:hom or :het) at the position
|
83
132
|
def bg_bulk_var(pos)
|
84
133
|
bg_base_hash = @bg_bulk[pos].var_base_frac
|
85
134
|
if bg_base_hash.length > 1
|
86
135
|
# taking only var mode
|
87
|
-
var_mode(bg_base_hash.values.max)
|
136
|
+
var_mode(bg_base_hash.values.max)
|
88
137
|
else
|
89
138
|
# taking only var mode
|
90
|
-
var_mode(bg_base_hash[0])
|
139
|
+
var_mode(bg_base_hash[0])
|
91
140
|
end
|
92
141
|
end
|
93
142
|
|
143
|
+
# method stores pos as key and allele fraction as value
|
144
|
+
# to @hm_pos or @ht_pos hash based on variant type
|
145
|
+
# @param var_type [Symbol] values are either :hom or :het
|
146
|
+
# @param pos [Integer] position in the contig
|
147
|
+
# @param ratio [Float] allele fraction
|
94
148
|
def categorise_pos(var_type, pos, ratio)
|
95
149
|
if var_type == :hom
|
96
150
|
@hm_pos[pos] = ratio
|
@@ -99,20 +153,10 @@ module Cheripic
|
|
99
153
|
end
|
100
154
|
end
|
101
155
|
|
102
|
-
#
|
103
|
-
#
|
104
|
-
|
105
|
-
|
106
|
-
ht_high = Options.params.hthigh
|
107
|
-
mode = ''
|
108
|
-
if ratio.between?(ht_low, ht_high)
|
109
|
-
mode = :het
|
110
|
-
elsif ratio > ht_high
|
111
|
-
mode = :hom
|
112
|
-
end
|
113
|
-
[mode, ratio]
|
114
|
-
end
|
115
|
-
|
156
|
+
# Compares parental pileups for the contig and identify position
|
157
|
+
# that indicate variants from homelogues called hemi-snps
|
158
|
+
# and calculates bulk frequency ratio (bfr)
|
159
|
+
# @return [Hash] parent_hemi hash with position as key and bfr as value
|
116
160
|
def hemisnps_in_parent
|
117
161
|
# mark all the hemi snp based on both parents
|
118
162
|
self.mut_parent.each_key do |pos|
|