cheripic 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,21 +4,34 @@ require 'forwardable'
4
4
 
5
5
  module Cheripic
6
6
 
7
+ # Custom error handling for Regions class
7
8
  class RegionsError < CheripicError; end
8
9
 
10
+ # An application of Bio::DB::Fasta object that lets to extract selected regions of the fasta entries
11
+ #
12
+ # @!attribute [r] reference_db
13
+ # @return [Bio::DB::Fasta::FastaFile] indexed fasta object
14
+ # @!attribute [r] id_len
15
+ # @return [Hash] a hash of fasta entry ids as keys and sequence length as values
9
16
  class Regions
10
17
 
11
18
  include Enumerable
12
19
  extend Forwardable
13
20
  def_delegators :@id_len, :each, :each_key, :each_value, :length, :[]
14
- attr_accessor :reference_db, :id_len
21
+ attr_reader :reference_db, :id_len
15
22
 
23
+ # creates a Regions object using fasta
24
+ #
25
+ # @param assembly [String] path to a fasta file
16
26
  def initialize(assembly)
17
27
  @reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>assembly})
18
28
  @id_len = {}
19
29
  self.get_id_len
20
30
  end
21
31
 
32
+ # A method to extract fasta entry ids and length and make a hash
33
+ #
34
+ # @return [Hash] a hash of fasta entry ids as keys and sequence length as values
22
35
  def get_id_len
23
36
  @reference_db.load_fai_entries
24
37
  @reference_db.index.entries.each_entry do | entry |
@@ -26,10 +39,13 @@ module Cheripic
26
39
  end
27
40
  end
28
41
 
29
- # for each id and position returns left and right sequence
30
- # of pre-selected length
42
+ # A method to returns left and right sequence of specified length, for a provided id and position
43
+ # @note this method excludes the base at the position provided
44
+ # @param id [String]
45
+ # @param pos [Integer]
46
+ # @return [Array<String>]
31
47
  def fetch_seq(id, pos)
32
- limit = Options.params.sel_seq_len + 1
48
+ limit = Options.sel_seq_len + 1
33
49
  len = @id_len[id]
34
50
  low = pos-limit <= 0 ? 0 : pos-limit
35
51
  high = pos+limit >= len ? len : pos+limit
@@ -4,15 +4,33 @@ require 'forwardable'
4
4
 
5
5
  module Cheripic
6
6
 
7
+ # Custom error handling for Variants class
7
8
  class VariantsError < CheripicError; end
8
9
 
10
+ # A Variants object for each analysis pipeline that stores
11
+ # assembly details and extracts pileups for each contig
12
+ # assembly and pileup details are stored as
13
+ # hashes of Contig and ContigPileups objects
14
+ #
15
+ # @!attribute [r] assembly
16
+ # @return [Hash] a hash of contig ids from assembly as keys and respective Contig objects as values
17
+ # @!attribute [r] pileups
18
+ # @return [Hash] a hash of contig ids from assembly as keys and respective ContigPileups objects as values
19
+ # @!attribute [r] hmes_frags
20
+ # @return [Hash] a hash of contigs with selected hme score, a subset of assembly hash
21
+ # @!attribute [r] bfr_frags
22
+ # @return [Hash] a hash of contigs with selected bfr score, a subset of assembly hash
23
+ # @!attribute [r] pileups_analyzed
24
+ # @return [Boolean] a Boolean option to check if pileups for the assembly are analyzed or not
9
25
  class Variants
10
26
 
11
27
  include Enumerable
12
28
  extend Forwardable
13
29
  def_delegators :@assembly, :each, :each_key, :each_value, :size, :length, :[]
14
- attr_accessor :assembly, :has_run, :pileups, :hmes_frags, :bfr_frags
30
+ attr_reader :assembly, :pileups, :hmes_frags, :bfr_frags, :pileups_analyzed
15
31
 
32
+ # creates a Variants object using user input files
33
+ # @param options [Hash] a hash of required input files as keys and file paths as values
16
34
  def initialize(options)
17
35
  @params = options
18
36
  @assembly = {}
@@ -31,10 +49,11 @@ module Cheripic
31
49
  @assembly[contig.id] = contig
32
50
  @pileups[contig.id] = ContigPileups.new(contig.id)
33
51
  end
52
+ @pileups_analyzed = false
34
53
  end
35
54
 
36
- # Read and store pileup data for each bulk and parents
37
- #
55
+ # Reads and store pileup data for each of input bulk and parents pileup files
56
+ # And sets pileups_analyzed to true that pileups files are processed
38
57
  def analyse_pileups
39
58
  @bg_bulk = @params.bg_bulk
40
59
  @mut_parent = @params.mut_parent
@@ -47,9 +66,13 @@ module Cheripic
47
66
  end
48
67
  end
49
68
 
50
- @has_run = true
69
+ @pileups_analyzed = true
51
70
  end
52
71
 
72
+ # Input pileup file is read and positions are selected that pass the thresholds
73
+ # @param pileupfile [String] path to the pileup file to read
74
+ # @param sym [Symbol] Symbol of the pileup file used to write selected variants
75
+ # pileup information to respective ContigPileups object
53
76
  def extract_pileup(pileupfile, sym)
54
77
  # read mpileup file and process each variant
55
78
  File.foreach(pileupfile) do |line|
@@ -61,8 +84,12 @@ module Cheripic
61
84
  end
62
85
  end
63
86
 
87
+ # Once pileup files are analysed and variants are extracted from each bulk;
88
+ # bulks are compared to identify and isolate variants for downstream analysis.
89
+ # If polyploidy set to trye and mut_parent and bg_parent bulks are provided
90
+ # hemisnps in parents are extracted for bulk frequency ratio analysis
64
91
  def compare_pileups
65
- unless defined?(@has_run)
92
+ unless @pileups_analyzed
66
93
  self.analyse_pileups
67
94
  end
68
95
  @assembly.each_key do | id |
@@ -75,11 +102,15 @@ module Cheripic
75
102
  end
76
103
  end
77
104
 
105
+ # From Assembly contig objects, contigs are selected based on user selected options
106
+ # for homozygosity enrichment score (hme_score)
78
107
  def hmes_frags
79
108
  # calculate every time method gets called
80
109
  @hmes_frags = select_contigs(:hme_score)
81
110
  end
82
111
 
112
+ # From Assembly contig objects, contigs are selected based on user selected options
113
+ # for bulk frequency ratio (bfr_score)
83
114
  def bfr_frags
84
115
  unless defined?(@bfr_frags)
85
116
  @bfr_frags = select_contigs(:bfr_score)
@@ -87,14 +118,19 @@ module Cheripic
87
118
  @bfr_frags
88
119
  end
89
120
 
121
+ # Applies selection procedure on assembly contigs based on the ratio_type provided.
122
+ # If only_frag_with_vars is set to true then contigs without any variant are discarded for :hme_score
123
+ # while contigs without any hemisnps are discarded for :bfr_score
124
+ # If filter_out_low_hmes is set to true then contigs are further filtered based on a cut off value of the score
125
+ # @param ratio_type [Symbol] ratio_type is either :hme_score or :bfr_score
90
126
  def select_contigs(ratio_type)
91
127
  selected_contigs ={}
92
- only_frag_with_vars = Options.params.only_frag_with_vars
128
+ only_frag_with_vars = Options.only_frag_with_vars
93
129
  @assembly.each_key do | frag |
94
130
  if only_frag_with_vars
95
131
  if ratio_type == :hme_score
96
132
  # selecting fragments which have a variant
97
- if @assembly[frag].hm_num + @assembly[frag].ht_num > 2 * Options.params.hmes_adjust
133
+ if @assembly[frag].hm_num + @assembly[frag].ht_num > 2 * Options.hmes_adjust
98
134
  selected_contigs[frag] = @assembly[frag]
99
135
  end
100
136
  else # ratio_type == :bfr_score
@@ -116,6 +152,10 @@ module Cheripic
116
152
  selected_contigs
117
153
  end
118
154
 
155
+ # Filters out contigs below a cutoff for selected ratio_type
156
+ # a cutoff value is calculated based on ratio_type provided
157
+ # @param ratio_type [Symbol] ratio_type is either :hme_score or :bfr_score
158
+ # @param selected_contigs [Hash] a hash of contigs with selected ratio_type, a subset of assembly hash
119
159
  def filter_contigs(selected_contigs, ratio_type)
120
160
  cutoff = get_cutoff(selected_contigs, ratio_type)
121
161
  selected_contigs.each_key do | frag |
@@ -126,15 +166,19 @@ module Cheripic
126
166
  selected_contigs
127
167
  end
128
168
 
169
+ # Cut off value calculation used to filter out low scored contigs.
170
+ #
171
+ # @param ratio_type [Symbol] ratio_type is either :hme_score or :bfr_score
172
+ # @param selected_contigs [Hash] a hash of contigs with selected ratio_type, a subset of assembly hash
129
173
  def get_cutoff(selected_contigs, ratio_type)
130
- filter_out_low_hmes = Options.params.filter_out_low_hmes
174
+ filter_out_low_hmes = Options.filter_out_low_hmes
131
175
  # set minimum cut off hme_score or bfr_score to pick fragments with variants
132
176
  # calculate min hme score for back or out crossed data or bfr_score for polypoidy data
133
177
  # if no filtering applied set cutoff to 1.1
134
178
  if filter_out_low_hmes
135
179
  if ratio_type == :hme_score
136
- adjust = Options.params.hmes_adjust
137
- if Options.params.cross_type == 'back'
180
+ adjust = Options.hmes_adjust
181
+ if Options.cross_type == 'back'
138
182
  cutoff = (1.0/adjust) + 1.0
139
183
  else # outcross
140
184
  cutoff = (2.0/adjust) + 1.0
@@ -148,6 +192,9 @@ module Cheripic
148
192
  cutoff
149
193
  end
150
194
 
195
+ # Cut off value calculation for bfr contigs.
196
+ # ratio value at index 0.1% length of an array or at index zero of an array that contains decreasing order of bfr ratios
197
+ # @param selected_contigs [Hash] a hash of contigs with selected bfr score, a subset of assembly hash
151
198
  def bfr_cutoff(selected_contigs, prop=0.1)
152
199
  ratios = []
153
200
  selected_contigs.each_key do | frag |
@@ -162,8 +209,8 @@ module Cheripic
162
209
  ratios[index - 1]
163
210
  end
164
211
 
165
- # method is to discard homozygous variant positions for which background bulk
166
- # pileup shows proportion higher than 0.35 for variant allele/non-reference allele
212
+ # Method is to discard homozygous variant positions for which background bulk
213
+ # pileup shows a fraction value higher than 0.35 for variant allele/non-reference allele
167
214
  # a recessive variant is expected to have 1/3rd frequency in background bulk
168
215
  def verify_bg_bulk_pileup
169
216
  unless defined?(@hmes_frags)
@@ -1,3 +1,7 @@
1
1
  module Cheripic
2
- VERSION = '1.0.0'
2
+
3
+ # Sets the semantic version number for this module.
4
+ # Version number will be used in help messages and for generating gem.
5
+ VERSION = '1.1.0'
6
+
3
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cheripic
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shyam Rallapalli
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-07-20 00:00:00.000000000 Z
11
+ date: 2016-08-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: yell
@@ -70,6 +70,20 @@ dependencies:
70
70
  - - ">="
71
71
  - !ruby/object:Gem::Version
72
72
  version: 1.5.0
73
+ - !ruby/object:Gem::Dependency
74
+ name: bio-samtools
75
+ requirement: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - "~>"
78
+ - !ruby/object:Gem::Version
79
+ version: 2.4.0
80
+ type: :runtime
81
+ prerelease: false
82
+ version_requirements: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - "~>"
85
+ - !ruby/object:Gem::Version
86
+ version: 2.4.0
73
87
  - !ruby/object:Gem::Dependency
74
88
  name: bio-gngm
75
89
  requirement: !ruby/object:Gem::Requirement
@@ -124,14 +138,14 @@ dependencies:
124
138
  requirements:
125
139
  - - "~>"
126
140
  - !ruby/object:Gem::Version
127
- version: '1.10'
141
+ version: 1.7.6
128
142
  type: :development
129
143
  prerelease: false
130
144
  version_requirements: !ruby/object:Gem::Requirement
131
145
  requirements:
132
146
  - - "~>"
133
147
  - !ruby/object:Gem::Version
134
- version: '1.10'
148
+ version: 1.7.6
135
149
  - !ruby/object:Gem::Dependency
136
150
  name: rake
137
151
  requirement: !ruby/object:Gem::Requirement
@@ -249,6 +263,7 @@ files:
249
263
  - LICENSE.txt
250
264
  - README.md
251
265
  - Rakefile
266
+ - bin/cheripic
252
267
  - bin/console
253
268
  - bin/setup
254
269
  - cheripic.gemspec
@@ -283,7 +298,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
283
298
  version: '0'
284
299
  requirements: []
285
300
  rubyforge_project:
286
- rubygems_version: 2.4.6
301
+ rubygems_version: 2.4.3
287
302
  signing_key:
288
303
  specification_version: 4
289
304
  summary: picks causative mutation from bulks segregant sequencing