cheripic 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile +0 -1
- data/bin/cheripic +13 -0
- data/cheripic.gemspec +2 -2
- data/lib/cheripic.rb +7 -1
- data/lib/cheripic/bfr.rb +21 -5
- data/lib/cheripic/cmd.rb +36 -14
- data/lib/cheripic/contig.rb +34 -7
- data/lib/cheripic/contig_pileups.rb +70 -26
- data/lib/cheripic/implementer.rb +24 -3
- data/lib/cheripic/options.rb +110 -10
- data/lib/cheripic/pileup.rb +150 -159
- data/lib/cheripic/regions.rb +20 -4
- data/lib/cheripic/variants.rb +59 -12
- data/lib/cheripic/version.rb +5 -1
- metadata +20 -5
data/lib/cheripic/regions.rb
CHANGED
@@ -4,21 +4,34 @@ require 'forwardable'
|
|
4
4
|
|
5
5
|
module Cheripic
|
6
6
|
|
7
|
+
# Custom error handling for Regions class
|
7
8
|
class RegionsError < CheripicError; end
|
8
9
|
|
10
|
+
# An application of Bio::DB::Fasta object that lets to extract selected regions of the fasta entries
|
11
|
+
#
|
12
|
+
# @!attribute [r] reference_db
|
13
|
+
# @return [Bio::DB::Fasta::FastaFile] indexed fasta object
|
14
|
+
# @!attribute [r] id_len
|
15
|
+
# @return [Hash] a hash of fasta entry ids as keys and sequence length as values
|
9
16
|
class Regions
|
10
17
|
|
11
18
|
include Enumerable
|
12
19
|
extend Forwardable
|
13
20
|
def_delegators :@id_len, :each, :each_key, :each_value, :length, :[]
|
14
|
-
|
21
|
+
attr_reader :reference_db, :id_len
|
15
22
|
|
23
|
+
# creates a Regions object using fasta
|
24
|
+
#
|
25
|
+
# @param assembly [String] path to a fasta file
|
16
26
|
def initialize(assembly)
|
17
27
|
@reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>assembly})
|
18
28
|
@id_len = {}
|
19
29
|
self.get_id_len
|
20
30
|
end
|
21
31
|
|
32
|
+
# A method to extract fasta entry ids and length and make a hash
|
33
|
+
#
|
34
|
+
# @return [Hash] a hash of fasta entry ids as keys and sequence length as values
|
22
35
|
def get_id_len
|
23
36
|
@reference_db.load_fai_entries
|
24
37
|
@reference_db.index.entries.each_entry do | entry |
|
@@ -26,10 +39,13 @@ module Cheripic
|
|
26
39
|
end
|
27
40
|
end
|
28
41
|
|
29
|
-
#
|
30
|
-
#
|
42
|
+
# A method to returns left and right sequence of specified length, for a provided id and position
|
43
|
+
# @note this method excludes the base at the position provided
|
44
|
+
# @param id [String]
|
45
|
+
# @param pos [Integer]
|
46
|
+
# @return [Array<String>]
|
31
47
|
def fetch_seq(id, pos)
|
32
|
-
limit = Options.
|
48
|
+
limit = Options.sel_seq_len + 1
|
33
49
|
len = @id_len[id]
|
34
50
|
low = pos-limit <= 0 ? 0 : pos-limit
|
35
51
|
high = pos+limit >= len ? len : pos+limit
|
data/lib/cheripic/variants.rb
CHANGED
@@ -4,15 +4,33 @@ require 'forwardable'
|
|
4
4
|
|
5
5
|
module Cheripic
|
6
6
|
|
7
|
+
# Custom error handling for Variants class
|
7
8
|
class VariantsError < CheripicError; end
|
8
9
|
|
10
|
+
# A Variants object for each analysis pipeline that stores
|
11
|
+
# assembly details and extracts pileups for each contig
|
12
|
+
# assembly and pileup details are stored as
|
13
|
+
# hashes of Contig and ContigPileups objects
|
14
|
+
#
|
15
|
+
# @!attribute [r] assembly
|
16
|
+
# @return [Hash] a hash of contig ids from assembly as keys and respective Contig objects as values
|
17
|
+
# @!attribute [r] pileups
|
18
|
+
# @return [Hash] a hash of contig ids from assembly as keys and respective ContigPileups objects as values
|
19
|
+
# @!attribute [r] hmes_frags
|
20
|
+
# @return [Hash] a hash of contigs with selected hme score, a subset of assembly hash
|
21
|
+
# @!attribute [r] bfr_frags
|
22
|
+
# @return [Hash] a hash of contigs with selected bfr score, a subset of assembly hash
|
23
|
+
# @!attribute [r] pileups_analyzed
|
24
|
+
# @return [Boolean] a Boolean option to check if pileups for the assembly are analyzed or not
|
9
25
|
class Variants
|
10
26
|
|
11
27
|
include Enumerable
|
12
28
|
extend Forwardable
|
13
29
|
def_delegators :@assembly, :each, :each_key, :each_value, :size, :length, :[]
|
14
|
-
|
30
|
+
attr_reader :assembly, :pileups, :hmes_frags, :bfr_frags, :pileups_analyzed
|
15
31
|
|
32
|
+
# creates a Variants object using user input files
|
33
|
+
# @param options [Hash] a hash of required input files as keys and file paths as values
|
16
34
|
def initialize(options)
|
17
35
|
@params = options
|
18
36
|
@assembly = {}
|
@@ -31,10 +49,11 @@ module Cheripic
|
|
31
49
|
@assembly[contig.id] = contig
|
32
50
|
@pileups[contig.id] = ContigPileups.new(contig.id)
|
33
51
|
end
|
52
|
+
@pileups_analyzed = false
|
34
53
|
end
|
35
54
|
|
36
|
-
#
|
37
|
-
#
|
55
|
+
# Reads and store pileup data for each of input bulk and parents pileup files
|
56
|
+
# And sets pileups_analyzed to true that pileups files are processed
|
38
57
|
def analyse_pileups
|
39
58
|
@bg_bulk = @params.bg_bulk
|
40
59
|
@mut_parent = @params.mut_parent
|
@@ -47,9 +66,13 @@ module Cheripic
|
|
47
66
|
end
|
48
67
|
end
|
49
68
|
|
50
|
-
@
|
69
|
+
@pileups_analyzed = true
|
51
70
|
end
|
52
71
|
|
72
|
+
# Input pileup file is read and positions are selected that pass the thresholds
|
73
|
+
# @param pileupfile [String] path to the pileup file to read
|
74
|
+
# @param sym [Symbol] Symbol of the pileup file used to write selected variants
|
75
|
+
# pileup information to respective ContigPileups object
|
53
76
|
def extract_pileup(pileupfile, sym)
|
54
77
|
# read mpileup file and process each variant
|
55
78
|
File.foreach(pileupfile) do |line|
|
@@ -61,8 +84,12 @@ module Cheripic
|
|
61
84
|
end
|
62
85
|
end
|
63
86
|
|
87
|
+
# Once pileup files are analysed and variants are extracted from each bulk;
|
88
|
+
# bulks are compared to identify and isolate variants for downstream analysis.
|
89
|
+
# If polyploidy set to trye and mut_parent and bg_parent bulks are provided
|
90
|
+
# hemisnps in parents are extracted for bulk frequency ratio analysis
|
64
91
|
def compare_pileups
|
65
|
-
unless
|
92
|
+
unless @pileups_analyzed
|
66
93
|
self.analyse_pileups
|
67
94
|
end
|
68
95
|
@assembly.each_key do | id |
|
@@ -75,11 +102,15 @@ module Cheripic
|
|
75
102
|
end
|
76
103
|
end
|
77
104
|
|
105
|
+
# From Assembly contig objects, contigs are selected based on user selected options
|
106
|
+
# for homozygosity enrichment score (hme_score)
|
78
107
|
def hmes_frags
|
79
108
|
# calculate every time method gets called
|
80
109
|
@hmes_frags = select_contigs(:hme_score)
|
81
110
|
end
|
82
111
|
|
112
|
+
# From Assembly contig objects, contigs are selected based on user selected options
|
113
|
+
# for bulk frequency ratio (bfr_score)
|
83
114
|
def bfr_frags
|
84
115
|
unless defined?(@bfr_frags)
|
85
116
|
@bfr_frags = select_contigs(:bfr_score)
|
@@ -87,14 +118,19 @@ module Cheripic
|
|
87
118
|
@bfr_frags
|
88
119
|
end
|
89
120
|
|
121
|
+
# Applies selection procedure on assembly contigs based on the ratio_type provided.
|
122
|
+
# If only_frag_with_vars is set to true then contigs without any variant are discarded for :hme_score
|
123
|
+
# while contigs without any hemisnps are discarded for :bfr_score
|
124
|
+
# If filter_out_low_hmes is set to true then contigs are further filtered based on a cut off value of the score
|
125
|
+
# @param ratio_type [Symbol] ratio_type is either :hme_score or :bfr_score
|
90
126
|
def select_contigs(ratio_type)
|
91
127
|
selected_contigs ={}
|
92
|
-
only_frag_with_vars = Options.
|
128
|
+
only_frag_with_vars = Options.only_frag_with_vars
|
93
129
|
@assembly.each_key do | frag |
|
94
130
|
if only_frag_with_vars
|
95
131
|
if ratio_type == :hme_score
|
96
132
|
# selecting fragments which have a variant
|
97
|
-
if @assembly[frag].hm_num + @assembly[frag].ht_num > 2 * Options.
|
133
|
+
if @assembly[frag].hm_num + @assembly[frag].ht_num > 2 * Options.hmes_adjust
|
98
134
|
selected_contigs[frag] = @assembly[frag]
|
99
135
|
end
|
100
136
|
else # ratio_type == :bfr_score
|
@@ -116,6 +152,10 @@ module Cheripic
|
|
116
152
|
selected_contigs
|
117
153
|
end
|
118
154
|
|
155
|
+
# Filters out contigs below a cutoff for selected ratio_type
|
156
|
+
# a cutoff value is calculated based on ratio_type provided
|
157
|
+
# @param ratio_type [Symbol] ratio_type is either :hme_score or :bfr_score
|
158
|
+
# @param selected_contigs [Hash] a hash of contigs with selected ratio_type, a subset of assembly hash
|
119
159
|
def filter_contigs(selected_contigs, ratio_type)
|
120
160
|
cutoff = get_cutoff(selected_contigs, ratio_type)
|
121
161
|
selected_contigs.each_key do | frag |
|
@@ -126,15 +166,19 @@ module Cheripic
|
|
126
166
|
selected_contigs
|
127
167
|
end
|
128
168
|
|
169
|
+
# Cut off value calculation used to filter out low scored contigs.
|
170
|
+
#
|
171
|
+
# @param ratio_type [Symbol] ratio_type is either :hme_score or :bfr_score
|
172
|
+
# @param selected_contigs [Hash] a hash of contigs with selected ratio_type, a subset of assembly hash
|
129
173
|
def get_cutoff(selected_contigs, ratio_type)
|
130
|
-
filter_out_low_hmes = Options.
|
174
|
+
filter_out_low_hmes = Options.filter_out_low_hmes
|
131
175
|
# set minimum cut off hme_score or bfr_score to pick fragments with variants
|
132
176
|
# calculate min hme score for back or out crossed data or bfr_score for polypoidy data
|
133
177
|
# if no filtering applied set cutoff to 1.1
|
134
178
|
if filter_out_low_hmes
|
135
179
|
if ratio_type == :hme_score
|
136
|
-
adjust = Options.
|
137
|
-
if Options.
|
180
|
+
adjust = Options.hmes_adjust
|
181
|
+
if Options.cross_type == 'back'
|
138
182
|
cutoff = (1.0/adjust) + 1.0
|
139
183
|
else # outcross
|
140
184
|
cutoff = (2.0/adjust) + 1.0
|
@@ -148,6 +192,9 @@ module Cheripic
|
|
148
192
|
cutoff
|
149
193
|
end
|
150
194
|
|
195
|
+
# Cut off value calculation for bfr contigs.
|
196
|
+
# ratio value at index 0.1% length of an array or at index zero of an array that contains decreasing order of bfr ratios
|
197
|
+
# @param selected_contigs [Hash] a hash of contigs with selected bfr score, a subset of assembly hash
|
151
198
|
def bfr_cutoff(selected_contigs, prop=0.1)
|
152
199
|
ratios = []
|
153
200
|
selected_contigs.each_key do | frag |
|
@@ -162,8 +209,8 @@ module Cheripic
|
|
162
209
|
ratios[index - 1]
|
163
210
|
end
|
164
211
|
|
165
|
-
#
|
166
|
-
# pileup shows
|
212
|
+
# Method is to discard homozygous variant positions for which background bulk
|
213
|
+
# pileup shows a fraction value higher than 0.35 for variant allele/non-reference allele
|
167
214
|
# a recessive variant is expected to have 1/3rd frequency in background bulk
|
168
215
|
def verify_bg_bulk_pileup
|
169
216
|
unless defined?(@hmes_frags)
|
data/lib/cheripic/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cheripic
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shyam Rallapalli
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-08-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: yell
|
@@ -70,6 +70,20 @@ dependencies:
|
|
70
70
|
- - ">="
|
71
71
|
- !ruby/object:Gem::Version
|
72
72
|
version: 1.5.0
|
73
|
+
- !ruby/object:Gem::Dependency
|
74
|
+
name: bio-samtools
|
75
|
+
requirement: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - "~>"
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: 2.4.0
|
80
|
+
type: :runtime
|
81
|
+
prerelease: false
|
82
|
+
version_requirements: !ruby/object:Gem::Requirement
|
83
|
+
requirements:
|
84
|
+
- - "~>"
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: 2.4.0
|
73
87
|
- !ruby/object:Gem::Dependency
|
74
88
|
name: bio-gngm
|
75
89
|
requirement: !ruby/object:Gem::Requirement
|
@@ -124,14 +138,14 @@ dependencies:
|
|
124
138
|
requirements:
|
125
139
|
- - "~>"
|
126
140
|
- !ruby/object:Gem::Version
|
127
|
-
version:
|
141
|
+
version: 1.7.6
|
128
142
|
type: :development
|
129
143
|
prerelease: false
|
130
144
|
version_requirements: !ruby/object:Gem::Requirement
|
131
145
|
requirements:
|
132
146
|
- - "~>"
|
133
147
|
- !ruby/object:Gem::Version
|
134
|
-
version:
|
148
|
+
version: 1.7.6
|
135
149
|
- !ruby/object:Gem::Dependency
|
136
150
|
name: rake
|
137
151
|
requirement: !ruby/object:Gem::Requirement
|
@@ -249,6 +263,7 @@ files:
|
|
249
263
|
- LICENSE.txt
|
250
264
|
- README.md
|
251
265
|
- Rakefile
|
266
|
+
- bin/cheripic
|
252
267
|
- bin/console
|
253
268
|
- bin/setup
|
254
269
|
- cheripic.gemspec
|
@@ -283,7 +298,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
283
298
|
version: '0'
|
284
299
|
requirements: []
|
285
300
|
rubyforge_project:
|
286
|
-
rubygems_version: 2.4.
|
301
|
+
rubygems_version: 2.4.3
|
287
302
|
signing_key:
|
288
303
|
specification_version: 4
|
289
304
|
summary: picks causative mutation from bulks segregant sequencing
|