cheripic 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile +0 -1
- data/bin/cheripic +13 -0
- data/cheripic.gemspec +2 -2
- data/lib/cheripic.rb +7 -1
- data/lib/cheripic/bfr.rb +21 -5
- data/lib/cheripic/cmd.rb +36 -14
- data/lib/cheripic/contig.rb +34 -7
- data/lib/cheripic/contig_pileups.rb +70 -26
- data/lib/cheripic/implementer.rb +24 -3
- data/lib/cheripic/options.rb +110 -10
- data/lib/cheripic/pileup.rb +150 -159
- data/lib/cheripic/regions.rb +20 -4
- data/lib/cheripic/variants.rb +59 -12
- data/lib/cheripic/version.rb +5 -1
- metadata +20 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 458f681424a73ea58acb8aefa73d68019ad0854d
|
4
|
+
data.tar.gz: 23547939b1fead465d06d2f6d8e45ce4172b1cb1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2e3af0df95197769c542b4aab76444a6b14842890b46a97d6be10101f267db5f5df7d1ed8d67083ac8890a866e1cab678a9b23c5dc03b1edb7b8fc2150b35097
|
7
|
+
data.tar.gz: 9aa159df9086102679bd6359d4a5bf94dfe72f52d9c11e66259ffd40754f767001bb2a67e996b958018a009db0a6aa558c7ebe4001c2f6bce9b8993bdfd66091
|
data/.gitignore
CHANGED
data/Gemfile
CHANGED
data/bin/cheripic
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'cheripic'
|
3
|
+
|
4
|
+
# rescue errors to get clean error messages through the logger
|
5
|
+
# backtrace can be accessed by setting --loglevel to 'debug' option
|
6
|
+
begin
|
7
|
+
submission = Cheripic::Cmd.new ARGV
|
8
|
+
submission.run
|
9
|
+
rescue Cheripic::CheripicError => e
|
10
|
+
logger.error e.message
|
11
|
+
logger.debug e.backtrace unless e.backtrace.nil?
|
12
|
+
exit 1
|
13
|
+
end
|
data/cheripic.gemspec
CHANGED
@@ -22,12 +22,12 @@ Gem::Specification.new do |spec|
|
|
22
22
|
spec.add_runtime_dependency 'yell', '~> 2.0', '>= 2.0.5'
|
23
23
|
spec.add_runtime_dependency 'trollop', '~> 2.1', '>= 2.1.2'
|
24
24
|
spec.add_runtime_dependency 'bio', '~> 1.5', '>= 1.5.0'
|
25
|
-
|
25
|
+
spec.add_dependency 'bio-samtools', '~> 2.4.0'
|
26
26
|
spec.add_dependency 'bio-gngm', '~> 0.2.1'
|
27
27
|
spec.add_runtime_dependency 'rinruby', '~> 2.0', '>= 2.0.3'
|
28
28
|
|
29
29
|
spec.add_development_dependency 'activesupport', '~> 4.2.6'
|
30
|
-
spec.add_development_dependency 'bundler', '~> 1.
|
30
|
+
spec.add_development_dependency 'bundler', '~> 1.7.6'
|
31
31
|
spec.add_development_dependency 'rake', '~> 10.0'
|
32
32
|
spec.add_development_dependency 'minitest'
|
33
33
|
spec.add_development_dependency 'minitest-reporters', '>= 1.0.17'
|
data/lib/cheripic.rb
CHANGED
@@ -1,11 +1,17 @@
|
|
1
1
|
|
2
|
-
#
|
2
|
+
# sets up a global logger object to access across module
|
3
3
|
require 'yell'
|
4
|
+
|
5
|
+
# Computing Homozygosity Enriched Regions In genomes to Prioritize Identification of Candidate variants (CHERIPIC)
|
6
|
+
# Cheripic module provides tools and pipeline to extract potential candidate mutations
|
7
|
+
# in around the region of the genome hosting the causative mutation behind the phenotype of interest.
|
4
8
|
module Cheripic
|
5
9
|
|
6
10
|
# custom error handling
|
7
11
|
class CheripicError < StandardError; end
|
12
|
+
# custom error handling for IO
|
8
13
|
class CheripicIOError < CheripicError; end
|
14
|
+
# custom error handling for Arg
|
9
15
|
class CheripicArgError < CheripicError; end
|
10
16
|
|
11
17
|
# Define a logger and pass `Object` as name.
|
data/lib/cheripic/bfr.rb
CHANGED
@@ -2,16 +2,26 @@
|
|
2
2
|
|
3
3
|
module Cheripic
|
4
4
|
|
5
|
+
# Custom error handling for Bfr class
|
5
6
|
class BfrError < CheripicError; end
|
6
7
|
|
8
|
+
# A class to calculate bulk frequency ratio (bfr) using one or two hashes of base fractions resulted from pileup
|
9
|
+
#
|
10
|
+
# @!attribute [rw] bfr_adj
|
11
|
+
# @return [Float] a float value to adjust the bfr calculation
|
7
12
|
class Bfr
|
8
13
|
|
9
14
|
attr_accessor :bfr_adj
|
10
15
|
|
11
|
-
# get bulk frequency ratio (bfr) for
|
12
|
-
#
|
16
|
+
# A method to get bulk frequency ratio (bfr) for selected hemi snps.
|
17
|
+
# This is done by selecting which hash (mutant or background) to use for bfr calculation
|
18
|
+
# either calculates fraction or bfr
|
19
|
+
# and ignores positions with complex variants.
|
20
|
+
# @param mut_hash [Hash] a hash of base fractions from pileup of mutant bulk
|
21
|
+
# @param bg_hash [Hash] a hash of base fractions from pileup of background bulk
|
22
|
+
# @return [Float] a ratio calculated
|
13
23
|
def self.get_bfr(mut_hash, bg_hash='')
|
14
|
-
@bfr_adj = Options.
|
24
|
+
@bfr_adj = Options.bfr_adjust
|
15
25
|
if bg_hash != ''
|
16
26
|
# checking if only two vars in base hash and that includes ref
|
17
27
|
# checking if only one var in hemi snp
|
@@ -37,9 +47,12 @@ module Cheripic
|
|
37
47
|
bfr
|
38
48
|
end
|
39
49
|
|
40
|
-
# calculate bfr using
|
50
|
+
# A method to calculate bfr using a base fraction hash with hemi-snp
|
51
|
+
# @param two_key_hash [Hash] a hash of base fractions from pileup with 2 keys (a ref and variant base)
|
52
|
+
# @param other_hash [Hash] a hash of base fractions from pileup
|
53
|
+
# @return [Float] a ratio calculated
|
41
54
|
def self.calculate_bfr(two_key_hash, other_hash)
|
42
|
-
#
|
55
|
+
# if :ref is absent such as below noise depth, then set to zero
|
43
56
|
unless two_key_hash.key?(:ref)
|
44
57
|
two_key_hash[:ref] = 0
|
45
58
|
end
|
@@ -63,6 +76,9 @@ module Cheripic
|
|
63
76
|
bfr
|
64
77
|
end
|
65
78
|
|
79
|
+
# A method to calculate ratio using a base fraction hash
|
80
|
+
# @param hash [Hash] a hash of base fractions from pileup with 2 or 1 keys
|
81
|
+
# @return [Array<Float><String>] an array of ratio calculated and base character
|
66
82
|
def self.calc_fraction(hash)
|
67
83
|
unless hash.key?(:ref)
|
68
84
|
hash[:ref] = 0
|
data/lib/cheripic/cmd.rb
CHANGED
@@ -2,6 +2,10 @@
|
|
2
2
|
|
3
3
|
module Cheripic
|
4
4
|
|
5
|
+
# A command line option and processing object to handle input options
|
6
|
+
#
|
7
|
+
# @!attribute [rw] options
|
8
|
+
# @return [Hash] a hash of trollop option names as keys and user or default setting as values
|
5
9
|
class Cmd
|
6
10
|
|
7
11
|
require 'trollop'
|
@@ -10,11 +14,16 @@ module Cheripic
|
|
10
14
|
|
11
15
|
attr_accessor :options
|
12
16
|
|
17
|
+
# creates a Cmd object using input string entry
|
18
|
+
# @param args [String]
|
13
19
|
def initialize(args)
|
14
20
|
@options = parse_arguments(args)
|
15
21
|
check_arguments
|
16
22
|
end
|
17
23
|
|
24
|
+
# method to check input command string and run appropriate
|
25
|
+
# method of the object (help or examples or parsing arguments)
|
26
|
+
# @param args [String]
|
18
27
|
def parse_arguments(args)
|
19
28
|
Trollop::with_standard_exception_handling argument_parser do
|
20
29
|
if args.empty? || args.include?('-h') || args.include?('--help')
|
@@ -26,6 +35,8 @@ module Cheripic
|
|
26
35
|
end
|
27
36
|
end
|
28
37
|
|
38
|
+
# trollop argument_parser for input args string and
|
39
|
+
# @return [Hash] a hash of trollop option names as keys and user or default setting as values
|
29
40
|
def argument_parser
|
30
41
|
cmds = self
|
31
42
|
Trollop::Parser.new do
|
@@ -106,40 +117,44 @@ module Cheripic
|
|
106
117
|
end
|
107
118
|
end
|
108
119
|
|
120
|
+
# help message to display from command line
|
109
121
|
def help_message
|
110
|
-
<<-EOS
|
122
|
+
msg = <<-EOS
|
111
123
|
|
112
|
-
Cheripic v#{Cheripic::VERSION.dup}
|
113
|
-
Authors: Shyam Rallapalli and Dan MacLean
|
124
|
+
Cheripic v#{Cheripic::VERSION.dup}
|
125
|
+
Authors: Shyam Rallapalli and Dan MacLean
|
114
126
|
|
115
|
-
Description: Candidate mutation and closely linked marker selection for non reference genomes
|
116
|
-
Uses bulk segregant data from non-reference sequence genomes
|
127
|
+
Description: Candidate mutation and closely linked marker selection for non reference genomes
|
128
|
+
Uses bulk segregant data from non-reference sequence genomes
|
117
129
|
|
118
|
-
Inputs:
|
119
|
-
1. Needs a reference fasta file of asssembly use for variant analysis
|
120
|
-
2. Pileup files for mutant (phenotype of interest) bulks and background (wildtype phenotype) bulks
|
121
|
-
3. If polyploid species, include of pileup from one or both parents
|
130
|
+
Inputs:
|
131
|
+
1. Needs a reference fasta file of asssembly use for variant analysis
|
132
|
+
2. Pileup files for mutant (phenotype of interest) bulks and background (wildtype phenotype) bulks
|
133
|
+
3. If polyploid species, include of pileup from one or both parents
|
122
134
|
|
123
|
-
USAGE:
|
124
|
-
cheripic <options>
|
135
|
+
USAGE:
|
136
|
+
cheripic <options>
|
125
137
|
|
126
|
-
OPTIONS:
|
138
|
+
OPTIONS:
|
127
139
|
|
128
140
|
EOS
|
141
|
+
msg.split("\n").map{ |line| line.lstrip }.join("\n")
|
129
142
|
end
|
130
143
|
|
144
|
+
# examples to display from command line
|
131
145
|
def print_examples
|
132
146
|
msg = <<-EOS
|
133
147
|
|
134
|
-
|
148
|
+
Cheripic v#{Cheripic::VERSION.dup}
|
135
149
|
|
136
|
-
|
150
|
+
EXAMPLE COMMANDS:
|
137
151
|
|
138
152
|
EOS
|
139
153
|
puts msg.split("\n").map{ |line| line.lstrip }.join("\n")
|
140
154
|
exit(0)
|
141
155
|
end
|
142
156
|
|
157
|
+
# calls other methods to check if command line inputs are valid
|
143
158
|
def check_arguments
|
144
159
|
check_output_dir
|
145
160
|
check_log_level
|
@@ -153,6 +168,7 @@ OPTIONS:
|
|
153
168
|
# end
|
154
169
|
# end
|
155
170
|
|
171
|
+
# checks if input files are valid
|
156
172
|
def check_input_files
|
157
173
|
if @options[:polyploidy]
|
158
174
|
inputfiles = %i{assembly mut_bulk bg_bulk mut_parent bg_parent}
|
@@ -173,6 +189,7 @@ OPTIONS:
|
|
173
189
|
end
|
174
190
|
end
|
175
191
|
|
192
|
+
# checks if output directory already exists
|
176
193
|
def check_output_dir
|
177
194
|
if Dir.exist?(@options[:output])
|
178
195
|
raise CheripicArgError.new "#{@options[:output]} directory exists" +
|
@@ -180,6 +197,7 @@ OPTIONS:
|
|
180
197
|
end
|
181
198
|
end
|
182
199
|
|
200
|
+
# checks and sets logger level
|
183
201
|
def check_log_level
|
184
202
|
unless %w(error info warn debug).include?(@options[:loglevel])
|
185
203
|
raise CheripicArgError.new "Loglevel #{@options[:loglevel]} is not valid. " +
|
@@ -188,6 +206,10 @@ OPTIONS:
|
|
188
206
|
logger.level = Yell::Level.new @options[:loglevel].to_sym
|
189
207
|
end
|
190
208
|
|
209
|
+
# Initializes an Implementer object using input options
|
210
|
+
# and calls run method of the Implementer to start the pipeline
|
211
|
+
# A hash of trollop option names as keys and user or default
|
212
|
+
# setting as values is passed to Implementer object
|
191
213
|
def run
|
192
214
|
@options[:output] = File.expand_path @options[:output]
|
193
215
|
analysis = Implementer.new(@options)
|
data/lib/cheripic/contig.rb
CHANGED
@@ -4,16 +4,29 @@ require 'forwardable'
|
|
4
4
|
|
5
5
|
module Cheripic
|
6
6
|
|
7
|
+
# Custom error handling for Contig class
|
7
8
|
class ContigError < CheripicError; end
|
8
9
|
|
10
|
+
# A contig object from assembly that stores positions of
|
11
|
+
# homozygous, heterozygous and hemi-variants
|
12
|
+
#
|
13
|
+
# @!attribute [rw] hm_pos
|
14
|
+
# @return [Hash] a hash of homozygous variant positions as keys and allele frequency as values
|
15
|
+
# @!attribute [rw] ht_pos
|
16
|
+
# @return [Hash] a hash of heterozygous variant positions as keys and allele frequency as values
|
17
|
+
# @!attribute [rw] hemi_pos
|
18
|
+
# @return [Hash] a hash of hemi-variant positions as keys and allele frequency as values
|
19
|
+
# @!attribute [r] id
|
20
|
+
# @return [String] id of the contig in assembly taken from fasta file
|
21
|
+
# @!attribute [r] length
|
22
|
+
# @return [Integer] length of contig in bases
|
9
23
|
class Contig
|
10
24
|
|
11
|
-
|
12
|
-
|
13
|
-
# delegate [:size, :length] => :@contig
|
14
|
-
# def_delegator :@contig, :entry_id, :id
|
15
|
-
attr_accessor :hm_pos, :ht_pos, :hemi_pos, :id, :length
|
25
|
+
attr_accessor :hm_pos, :ht_pos, :hemi_pos
|
26
|
+
attr_reader :id, :length
|
16
27
|
|
28
|
+
# creates a Contig object using fasta entry
|
29
|
+
# @param fasta [Bio::FastaFormat] an individual fasta entry from input assembly file
|
17
30
|
def initialize (fasta)
|
18
31
|
@id = fasta.entry_id
|
19
32
|
@length = fasta.length
|
@@ -22,16 +35,23 @@ module Cheripic
|
|
22
35
|
@hemi_pos = {}
|
23
36
|
end
|
24
37
|
|
38
|
+
# Number of homozygous variants identified in the contig
|
39
|
+
# @return [Integer]
|
25
40
|
def hm_num
|
26
41
|
self.hm_pos.length
|
27
42
|
end
|
28
43
|
|
44
|
+
# Number of heterozygous variants identified in the contig
|
45
|
+
# @return [Integer]
|
29
46
|
def ht_num
|
30
47
|
self.ht_pos.length
|
31
48
|
end
|
32
49
|
|
50
|
+
# Homozygosity enrichment score calculated using
|
51
|
+
# hm_num and ht_num of the contig object
|
52
|
+
# @return [Float]
|
33
53
|
def hme_score
|
34
|
-
hmes_adjust = Options.
|
54
|
+
hmes_adjust = Options.hmes_adjust
|
35
55
|
if self.hm_num == 0 and self.ht_num == 0
|
36
56
|
0.0
|
37
57
|
else
|
@@ -39,10 +59,15 @@ module Cheripic
|
|
39
59
|
end
|
40
60
|
end
|
41
61
|
|
62
|
+
# Number of hemi-variants identified in the contig
|
63
|
+
# @return [Integer]
|
42
64
|
def hemi_num
|
43
65
|
self.hemi_pos.length
|
44
66
|
end
|
45
67
|
|
68
|
+
# Mean of bulk frequency ratios (bfr) calculated using
|
69
|
+
# bfr values all hemi_pos of the contig
|
70
|
+
# @return [Float]
|
46
71
|
def bfr_score
|
47
72
|
if self.hemi_pos.values.empty?
|
48
73
|
0.0
|
@@ -51,7 +76,9 @@ module Cheripic
|
|
51
76
|
end
|
52
77
|
end
|
53
78
|
|
54
|
-
#
|
79
|
+
# Calculates mean of an array of numbers
|
80
|
+
# @param array [Array] an array of bfr values from hemi_snp
|
81
|
+
# @return [Float] mean value as float
|
55
82
|
def geom_mean(array)
|
56
83
|
return array[0].to_f if array.length == 1
|
57
84
|
array.reduce(:+) / array.size.to_f
|
@@ -4,8 +4,25 @@ require 'forwardable'
|
|
4
4
|
|
5
5
|
module Cheripic
|
6
6
|
|
7
|
+
# Custom error handling for ContigPileup class
|
7
8
|
class ContigPileupsError < CheripicError; end
|
8
9
|
|
10
|
+
# A ContigPileup object for each contig from assembly that stores
|
11
|
+
# pileup file information and variants are selected from analysis of pileup files
|
12
|
+
# selected variants from pileup files is stored as hashes
|
13
|
+
#
|
14
|
+
# @!attribute [rw] id
|
15
|
+
# @return [String] id of the contig in assembly taken from fasta file
|
16
|
+
# @!attribute [rw] mut_bulk
|
17
|
+
# @return [Hash] a hash of variant positions from mut_bulk as keys and pileup info as values
|
18
|
+
# @!attribute [rw] bg_bulk
|
19
|
+
# @return [Hash] a hash of variant positions from bg_bulk as keys and pileup info as values
|
20
|
+
# @!attribute [rw] mut_parent
|
21
|
+
# @return [Hash] a hash of variant positions from mut_parent as keys and pileup info as values
|
22
|
+
# @!attribute [rw] bg_parent
|
23
|
+
# @return [Hash] a hash of variant positions from bg_parent as keys and pileup info as values
|
24
|
+
# @!attribute [rw] parent_hemi
|
25
|
+
# @return [Hash] a hash of hemi-variant positions as keys and bfr calculated from parent bulks as values
|
9
26
|
class ContigPileups
|
10
27
|
|
11
28
|
include Enumerable
|
@@ -17,6 +34,8 @@ module Cheripic
|
|
17
34
|
attr_accessor :id, :parent_hemi
|
18
35
|
attr_accessor :mut_bulk, :bg_bulk, :mut_parent, :bg_parent
|
19
36
|
|
37
|
+
# creates a ContigPileup object using fasta entry id
|
38
|
+
# @param fasta [String] a contig id from fasta entry
|
20
39
|
def initialize (fasta)
|
21
40
|
@id = fasta
|
22
41
|
@mut_bulk = {}
|
@@ -26,12 +45,15 @@ module Cheripic
|
|
26
45
|
@parent_hemi = {}
|
27
46
|
end
|
28
47
|
|
48
|
+
# bulk pileups are compared and variant positions are selected
|
49
|
+
# @return [Array<Hash>] variant positions are stored in hashes
|
50
|
+
# for homozygous, heterozygous and hemi-variant positions
|
29
51
|
def bulks_compared
|
30
52
|
@hm_pos = {}
|
31
53
|
@ht_pos = {}
|
32
54
|
@hemi_pos = {}
|
33
55
|
@mut_bulk.each_key do | pos |
|
34
|
-
if Options.
|
56
|
+
if Options.polyploidy and @parent_hemi.key?(pos)
|
35
57
|
bg_bases = ''
|
36
58
|
if @bg_bulk.key?(pos)
|
37
59
|
bg_bases = @bg_bulk[pos].var_base_frac
|
@@ -46,9 +68,11 @@ module Cheripic
|
|
46
68
|
[@hm_pos, @ht_pos, @hemi_pos]
|
47
69
|
end
|
48
70
|
|
49
|
-
#
|
50
|
-
#
|
51
|
-
#
|
71
|
+
# mut_bulk and bg_bulk pileups are compared at selected position of the contig.
|
72
|
+
# Empty hash results from position below selected coverage
|
73
|
+
# or bases freq below noise and such positions are deleted.
|
74
|
+
# @param pos [Integer] position in the contig
|
75
|
+
# stores variant type, position and allele fraction to either @hm_pos or @ht_pos hashes
|
52
76
|
def compare_pileup(pos)
|
53
77
|
base_hash = @mut_bulk[pos].var_base_frac
|
54
78
|
base_hash.delete(:ref)
|
@@ -56,22 +80,43 @@ module Cheripic
|
|
56
80
|
# we could ignore complex loci or
|
57
81
|
# take the variant type based on predominant base
|
58
82
|
if base_hash.length > 1
|
59
|
-
|
83
|
+
fraction = base_hash.values.max
|
84
|
+
mut_type = var_mode(fraction)
|
60
85
|
else
|
61
|
-
|
62
|
-
mut_type
|
86
|
+
fraction = base_hash[base_hash.keys[0]]
|
87
|
+
mut_type = var_mode(fraction)
|
63
88
|
end
|
64
89
|
if @bg_bulk.key?(pos)
|
65
90
|
bg_type = bg_bulk_var(pos)
|
66
91
|
mut_type = compare_var_type(mut_type, bg_type)
|
67
92
|
end
|
68
93
|
unless mut_type == nil
|
69
|
-
categorise_pos(mut_type, pos,
|
94
|
+
categorise_pos(mut_type, pos, fraction)
|
70
95
|
end
|
71
96
|
end
|
72
97
|
|
73
|
-
#
|
74
|
-
#
|
98
|
+
# Categorizes variant zygosity based on the allele fraction provided.
|
99
|
+
# Uses lower and upper limit set for heterozygosity in the options.
|
100
|
+
# @note consider increasing the range of heterozygosity limits for RNA-seq data
|
101
|
+
# @param fraction [Float] allele fraction
|
102
|
+
# @return [Symbol] of either :het or :hom to represent heterozygous or homozygous respectively
|
103
|
+
def var_mode(fraction)
|
104
|
+
ht_low = Options.htlow
|
105
|
+
ht_high = Options.hthigh
|
106
|
+
mode = ''
|
107
|
+
if fraction.between?(ht_low, ht_high)
|
108
|
+
mode = :het
|
109
|
+
elsif fraction > ht_high
|
110
|
+
mode = :hom
|
111
|
+
end
|
112
|
+
mode
|
113
|
+
end
|
114
|
+
|
115
|
+
# Simple comparison of variant type of mut and bg bulks at a position
|
116
|
+
# If both bulks have homozygous variant at selected position then it is ignored
|
117
|
+
# @param muttype [Symbol] values are either :hom or :het
|
118
|
+
# @param bgtype [Symbol] values are either :hom or :het
|
119
|
+
# @return [Symbol] variant mode of the mut bulk (:hom or :het) at the position or nil
|
75
120
|
def compare_var_type(muttype, bgtype)
|
76
121
|
if muttype == :hom and bgtype == :hom
|
77
122
|
nil
|
@@ -80,17 +125,26 @@ module Cheripic
|
|
80
125
|
end
|
81
126
|
end
|
82
127
|
|
128
|
+
# Method to extract var_mode from pileup information at a position in contig
|
129
|
+
#
|
130
|
+
# @param pos [Integer] position in the contig
|
131
|
+
# @return [Symbol] variant mode of the background bulk (:hom or :het) at the position
|
83
132
|
def bg_bulk_var(pos)
|
84
133
|
bg_base_hash = @bg_bulk[pos].var_base_frac
|
85
134
|
if bg_base_hash.length > 1
|
86
135
|
# taking only var mode
|
87
|
-
var_mode(bg_base_hash.values.max)
|
136
|
+
var_mode(bg_base_hash.values.max)
|
88
137
|
else
|
89
138
|
# taking only var mode
|
90
|
-
var_mode(bg_base_hash[0])
|
139
|
+
var_mode(bg_base_hash[0])
|
91
140
|
end
|
92
141
|
end
|
93
142
|
|
143
|
+
# method stores pos as key and allele fraction as value
|
144
|
+
# to @hm_pos or @ht_pos hash based on variant type
|
145
|
+
# @param var_type [Symbol] values are either :hom or :het
|
146
|
+
# @param pos [Integer] position in the contig
|
147
|
+
# @param ratio [Float] allele fraction
|
94
148
|
def categorise_pos(var_type, pos, ratio)
|
95
149
|
if var_type == :hom
|
96
150
|
@hm_pos[pos] = ratio
|
@@ -99,20 +153,10 @@ module Cheripic
|
|
99
153
|
end
|
100
154
|
end
|
101
155
|
|
102
|
-
#
|
103
|
-
#
|
104
|
-
|
105
|
-
|
106
|
-
ht_high = Options.params.hthigh
|
107
|
-
mode = ''
|
108
|
-
if ratio.between?(ht_low, ht_high)
|
109
|
-
mode = :het
|
110
|
-
elsif ratio > ht_high
|
111
|
-
mode = :hom
|
112
|
-
end
|
113
|
-
[mode, ratio]
|
114
|
-
end
|
115
|
-
|
156
|
+
# Compares parental pileups for the contig and identify position
|
157
|
+
# that indicate variants from homelogues called hemi-snps
|
158
|
+
# and calculates bulk frequency ratio (bfr)
|
159
|
+
# @return [Hash] parent_hemi hash with position as key and bfr as value
|
116
160
|
def hemisnps_in_parent
|
117
161
|
# mark all the hemi snp based on both parents
|
118
162
|
self.mut_parent.each_key do |pos|
|