BioDSL 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (186) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/BioDSL.gemspec +1 -1
  4. data/Gemfile +6 -0
  5. data/README.md +289 -155
  6. data/Rakefile +18 -16
  7. data/lib/BioDSL.rb +1 -1
  8. data/lib/BioDSL/cary.rb +78 -53
  9. data/lib/BioDSL/command.rb +2 -2
  10. data/lib/BioDSL/commands.rb +1 -1
  11. data/lib/BioDSL/commands/add_key.rb +1 -1
  12. data/lib/BioDSL/commands/align_seq_mothur.rb +4 -4
  13. data/lib/BioDSL/commands/analyze_residue_distribution.rb +5 -5
  14. data/lib/BioDSL/commands/assemble_pairs.rb +13 -13
  15. data/lib/BioDSL/commands/assemble_seq_idba.rb +7 -9
  16. data/lib/BioDSL/commands/assemble_seq_ray.rb +13 -13
  17. data/lib/BioDSL/commands/assemble_seq_spades.rb +4 -4
  18. data/lib/BioDSL/commands/classify_seq.rb +8 -8
  19. data/lib/BioDSL/commands/classify_seq_mothur.rb +5 -5
  20. data/lib/BioDSL/commands/clip_primer.rb +7 -7
  21. data/lib/BioDSL/commands/cluster_otus.rb +5 -5
  22. data/lib/BioDSL/commands/collapse_otus.rb +2 -2
  23. data/lib/BioDSL/commands/collect_otus.rb +2 -2
  24. data/lib/BioDSL/commands/complement_seq.rb +4 -4
  25. data/lib/BioDSL/commands/count.rb +1 -1
  26. data/lib/BioDSL/commands/count_values.rb +2 -2
  27. data/lib/BioDSL/commands/degap_seq.rb +6 -7
  28. data/lib/BioDSL/commands/dereplicate_seq.rb +1 -1
  29. data/lib/BioDSL/commands/dump.rb +2 -2
  30. data/lib/BioDSL/commands/filter_rrna.rb +4 -4
  31. data/lib/BioDSL/commands/genecall.rb +7 -7
  32. data/lib/BioDSL/commands/grab.rb +1 -1
  33. data/lib/BioDSL/commands/index_taxonomy.rb +3 -3
  34. data/lib/BioDSL/commands/mask_seq.rb +4 -4
  35. data/lib/BioDSL/commands/mean_scores.rb +2 -2
  36. data/lib/BioDSL/commands/merge_pair_seq.rb +3 -3
  37. data/lib/BioDSL/commands/merge_table.rb +1 -1
  38. data/lib/BioDSL/commands/merge_values.rb +1 -1
  39. data/lib/BioDSL/commands/plot_heatmap.rb +4 -5
  40. data/lib/BioDSL/commands/plot_histogram.rb +4 -4
  41. data/lib/BioDSL/commands/plot_matches.rb +5 -5
  42. data/lib/BioDSL/commands/plot_residue_distribution.rb +6 -6
  43. data/lib/BioDSL/commands/plot_scores.rb +7 -7
  44. data/lib/BioDSL/commands/random.rb +1 -1
  45. data/lib/BioDSL/commands/read_fasta.rb +9 -9
  46. data/lib/BioDSL/commands/read_fastq.rb +16 -16
  47. data/lib/BioDSL/commands/read_table.rb +2 -3
  48. data/lib/BioDSL/commands/reverse_seq.rb +4 -4
  49. data/lib/BioDSL/commands/slice_align.rb +4 -4
  50. data/lib/BioDSL/commands/slice_seq.rb +3 -3
  51. data/lib/BioDSL/commands/sort.rb +1 -1
  52. data/lib/BioDSL/commands/split_pair_seq.rb +6 -7
  53. data/lib/BioDSL/commands/split_values.rb +2 -2
  54. data/lib/BioDSL/commands/trim_primer.rb +13 -8
  55. data/lib/BioDSL/commands/trim_seq.rb +5 -5
  56. data/lib/BioDSL/commands/uchime_ref.rb +6 -6
  57. data/lib/BioDSL/commands/uclust.rb +5 -5
  58. data/lib/BioDSL/commands/unique_values.rb +1 -1
  59. data/lib/BioDSL/commands/usearch_global.rb +2 -2
  60. data/lib/BioDSL/commands/usearch_local.rb +2 -2
  61. data/lib/BioDSL/commands/write_fasta.rb +7 -9
  62. data/lib/BioDSL/commands/write_fastq.rb +4 -4
  63. data/lib/BioDSL/commands/write_table.rb +3 -3
  64. data/lib/BioDSL/commands/write_tree.rb +2 -3
  65. data/lib/BioDSL/config.rb +2 -2
  66. data/lib/BioDSL/csv.rb +8 -10
  67. data/lib/BioDSL/debug.rb +1 -1
  68. data/lib/BioDSL/fasta.rb +54 -40
  69. data/lib/BioDSL/fastq.rb +35 -32
  70. data/lib/BioDSL/filesys.rb +56 -47
  71. data/lib/BioDSL/fork.rb +1 -1
  72. data/lib/BioDSL/hamming.rb +1 -1
  73. data/lib/BioDSL/helpers.rb +1 -1
  74. data/lib/BioDSL/helpers/aux_helper.rb +1 -1
  75. data/lib/BioDSL/helpers/email_helper.rb +1 -1
  76. data/lib/BioDSL/helpers/history_helper.rb +1 -1
  77. data/lib/BioDSL/helpers/log_helper.rb +1 -1
  78. data/lib/BioDSL/helpers/options_helper.rb +1 -1
  79. data/lib/BioDSL/helpers/status_helper.rb +1 -1
  80. data/lib/BioDSL/html_report.rb +1 -1
  81. data/lib/BioDSL/math.rb +1 -1
  82. data/lib/BioDSL/mummer.rb +1 -1
  83. data/lib/BioDSL/pipeline.rb +1 -1
  84. data/lib/BioDSL/seq.rb +240 -231
  85. data/lib/BioDSL/seq/ambiguity.rb +1 -1
  86. data/lib/BioDSL/seq/assemble.rb +1 -1
  87. data/lib/BioDSL/seq/backtrack.rb +93 -76
  88. data/lib/BioDSL/seq/digest.rb +1 -1
  89. data/lib/BioDSL/seq/dynamic.rb +43 -55
  90. data/lib/BioDSL/seq/homopolymer.rb +34 -36
  91. data/lib/BioDSL/seq/kmer.rb +67 -50
  92. data/lib/BioDSL/seq/levenshtein.rb +35 -40
  93. data/lib/BioDSL/seq/translate.rb +64 -55
  94. data/lib/BioDSL/seq/trim.rb +60 -50
  95. data/lib/BioDSL/serializer.rb +1 -1
  96. data/lib/BioDSL/stream.rb +1 -1
  97. data/lib/BioDSL/taxonomy.rb +1 -1
  98. data/lib/BioDSL/test.rb +1 -1
  99. data/lib/BioDSL/tmp_dir.rb +1 -1
  100. data/lib/BioDSL/usearch.rb +1 -1
  101. data/lib/BioDSL/verbose.rb +1 -1
  102. data/lib/BioDSL/version.rb +2 -2
  103. data/test/BioDSL/commands/test_add_key.rb +1 -1
  104. data/test/BioDSL/commands/test_align_seq_mothur.rb +1 -1
  105. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +1 -1
  106. data/test/BioDSL/commands/test_assemble_pairs.rb +1 -1
  107. data/test/BioDSL/commands/test_assemble_seq_idba.rb +1 -1
  108. data/test/BioDSL/commands/test_assemble_seq_ray.rb +1 -1
  109. data/test/BioDSL/commands/test_assemble_seq_spades.rb +1 -1
  110. data/test/BioDSL/commands/test_classify_seq.rb +1 -1
  111. data/test/BioDSL/commands/test_classify_seq_mothur.rb +1 -1
  112. data/test/BioDSL/commands/test_clip_primer.rb +1 -1
  113. data/test/BioDSL/commands/test_cluster_otus.rb +1 -1
  114. data/test/BioDSL/commands/test_collapse_otus.rb +1 -1
  115. data/test/BioDSL/commands/test_collect_otus.rb +1 -1
  116. data/test/BioDSL/commands/test_complement_seq.rb +1 -1
  117. data/test/BioDSL/commands/test_count.rb +1 -1
  118. data/test/BioDSL/commands/test_count_values.rb +1 -1
  119. data/test/BioDSL/commands/test_degap_seq.rb +1 -1
  120. data/test/BioDSL/commands/test_dereplicate_seq.rb +1 -1
  121. data/test/BioDSL/commands/test_dump.rb +1 -1
  122. data/test/BioDSL/commands/test_filter_rrna.rb +1 -1
  123. data/test/BioDSL/commands/test_genecall.rb +1 -1
  124. data/test/BioDSL/commands/test_grab.rb +1 -1
  125. data/test/BioDSL/commands/test_index_taxonomy.rb +1 -1
  126. data/test/BioDSL/commands/test_mask_seq.rb +1 -1
  127. data/test/BioDSL/commands/test_mean_scores.rb +1 -1
  128. data/test/BioDSL/commands/test_merge_pair_seq.rb +1 -1
  129. data/test/BioDSL/commands/test_merge_table.rb +1 -1
  130. data/test/BioDSL/commands/test_merge_values.rb +1 -1
  131. data/test/BioDSL/commands/test_plot_heatmap.rb +1 -1
  132. data/test/BioDSL/commands/test_plot_histogram.rb +1 -1
  133. data/test/BioDSL/commands/test_plot_matches.rb +1 -1
  134. data/test/BioDSL/commands/test_plot_residue_distribution.rb +1 -1
  135. data/test/BioDSL/commands/test_plot_scores.rb +1 -1
  136. data/test/BioDSL/commands/test_random.rb +1 -1
  137. data/test/BioDSL/commands/test_read_fasta.rb +1 -1
  138. data/test/BioDSL/commands/test_read_fastq.rb +1 -1
  139. data/test/BioDSL/commands/test_read_table.rb +1 -1
  140. data/test/BioDSL/commands/test_reverse_seq.rb +1 -1
  141. data/test/BioDSL/commands/test_slice_align.rb +1 -1
  142. data/test/BioDSL/commands/test_slice_seq.rb +1 -1
  143. data/test/BioDSL/commands/test_sort.rb +1 -1
  144. data/test/BioDSL/commands/test_split_pair_seq.rb +1 -1
  145. data/test/BioDSL/commands/test_split_values.rb +1 -1
  146. data/test/BioDSL/commands/test_trim_primer.rb +1 -1
  147. data/test/BioDSL/commands/test_trim_seq.rb +1 -1
  148. data/test/BioDSL/commands/test_uchime_ref.rb +1 -1
  149. data/test/BioDSL/commands/test_uclust.rb +1 -1
  150. data/test/BioDSL/commands/test_unique_values.rb +1 -1
  151. data/test/BioDSL/commands/test_usearch_global.rb +1 -1
  152. data/test/BioDSL/commands/test_usearch_local.rb +1 -1
  153. data/test/BioDSL/commands/test_write_fasta.rb +1 -1
  154. data/test/BioDSL/commands/test_write_fastq.rb +1 -1
  155. data/test/BioDSL/commands/test_write_table.rb +1 -1
  156. data/test/BioDSL/commands/test_write_tree.rb +1 -1
  157. data/test/BioDSL/helpers/test_options_helper.rb +3 -3
  158. data/test/BioDSL/seq/test_assemble.rb +58 -56
  159. data/test/BioDSL/seq/test_backtrack.rb +83 -81
  160. data/test/BioDSL/seq/test_digest.rb +47 -45
  161. data/test/BioDSL/seq/test_dynamic.rb +66 -64
  162. data/test/BioDSL/seq/test_homopolymer.rb +35 -33
  163. data/test/BioDSL/seq/test_kmer.rb +29 -28
  164. data/test/BioDSL/seq/test_translate.rb +44 -42
  165. data/test/BioDSL/seq/test_trim.rb +59 -57
  166. data/test/BioDSL/test_cary.rb +1 -1
  167. data/test/BioDSL/test_command.rb +2 -2
  168. data/test/BioDSL/test_csv.rb +34 -31
  169. data/test/BioDSL/test_debug.rb +31 -31
  170. data/test/BioDSL/test_fasta.rb +30 -29
  171. data/test/BioDSL/test_fastq.rb +27 -26
  172. data/test/BioDSL/test_filesys.rb +28 -27
  173. data/test/BioDSL/test_fork.rb +29 -28
  174. data/test/BioDSL/test_math.rb +31 -30
  175. data/test/BioDSL/test_mummer.rb +1 -1
  176. data/test/BioDSL/test_pipeline.rb +1 -1
  177. data/test/BioDSL/test_seq.rb +42 -41
  178. data/test/BioDSL/test_serializer.rb +35 -33
  179. data/test/BioDSL/test_stream.rb +28 -27
  180. data/test/BioDSL/test_taxonomy.rb +38 -37
  181. data/test/BioDSL/test_test.rb +32 -31
  182. data/test/BioDSL/test_tmp_dir.rb +1 -1
  183. data/test/BioDSL/test_usearch.rb +28 -27
  184. data/test/BioDSL/test_verbose.rb +32 -31
  185. data/test/helper.rb +34 -31
  186. metadata +3 -2
@@ -21,13 +21,11 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
28
28
  module BioDSL
29
- # rubocop:disable ClassLength
30
-
31
29
  # == Assemble sequences the stream using IDBA_UD.
32
30
  #
33
31
  # +assemble_seq_idba+ is a wrapper around the prokaryotic metagenome
@@ -99,7 +97,7 @@ module BioDSL
99
97
  TmpDir.create('reads.fna', 'contig.fa') do |fa_in, fa_out, tmp_dir|
100
98
  process_input(input, output, fa_in)
101
99
  execute_idba(fa_in, tmp_dir)
102
- lengths = process_output(output, fa_out)
100
+ process_output(output, fa_out)
103
101
  end
104
102
 
105
103
  calc_n50(status)
@@ -123,7 +121,7 @@ module BioDSL
123
121
  def defaults
124
122
  @options[:kmer_min] ||= 24
125
123
  @options[:kmer_max] ||= 48
126
- @options[:cpus] ||= 1
124
+ @options[:cpus] ||= 1
127
125
  end
128
126
 
129
127
  # Read all records from input and emit non-sequence records to the output
@@ -141,7 +139,7 @@ module BioDSL
141
139
  entry = BioDSL::Seq.new_bp(record)
142
140
 
143
141
  @status[:sequences_in] += 1
144
- @status[:residues_in] += entry.length
142
+ @status[:residues_in] += entry.length
145
143
 
146
144
  fasta_io.puts entry.to_fasta
147
145
  else
@@ -193,9 +191,9 @@ module BioDSL
193
191
  BioDSL::Fasta.open(fa_out, 'r') do |ios|
194
192
  ios.each do |entry|
195
193
  output << entry.to_bp
196
- @status[:records_out] += 1
194
+ @status[:records_out] += 1
197
195
  @status[:sequences_out] += 1
198
- @status[:residues_out] += entry.length
196
+ @status[:residues_out] += entry.length
199
197
 
200
198
  @lengths << entry.length
201
199
  end
@@ -212,7 +210,7 @@ module BioDSL
212
210
  @lengths.reverse!
213
211
 
214
212
  status[:contig_max] = @lengths.first || 0
215
- status[:contig_min] = @lengths.last || 0
213
+ status[:contig_min] = @lengths.last || 0
216
214
  status[:contig_n50] = 0
217
215
 
218
216
  count = 0
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -164,10 +164,10 @@ module BioDSL
164
164
 
165
165
  # Set the default option values.
166
166
  def defaults
167
- @options[:kmer_min] ||= 21
168
- @options[:kmer_max] ||= 49
167
+ @options[:kmer_min] ||= 21
168
+ @options[:kmer_max] ||= 49
169
169
  @options[:contig_min] ||= 500
170
- @options[:cpus] ||= 1
170
+ @options[:cpus] ||= 1
171
171
  end
172
172
 
173
173
  # Read all records from input and emit non-sequence records to the output
@@ -185,7 +185,7 @@ module BioDSL
185
185
  entry = BioDSL::Seq.new_bp(record)
186
186
 
187
187
  @status[:sequences_in] += 1
188
- @status[:residues_in] += entry.length
188
+ @status[:residues_in] += entry.length
189
189
 
190
190
  fasta_io.puts entry.to_fasta
191
191
  else
@@ -314,11 +314,11 @@ module BioDSL
314
314
  next if entry.length < @options[:contig_min]
315
315
 
316
316
  lengths << entry.length
317
- output << entry.to_bp
317
+ output << entry.to_bp
318
318
 
319
- @status[:records_out] += 1
319
+ @status[:records_out] += 1
320
320
  @status[:sequences_out] += 1
321
- @status[:residues_out] += entry.length
321
+ @status[:residues_out] += entry.length
322
322
  end
323
323
  end
324
324
 
@@ -333,11 +333,11 @@ module BioDSL
333
333
  @status[:kmer] = kmer
334
334
  @status[:paired] = @paired
335
335
 
336
- unless lengths.empty?
337
- @status[:contig_min] = lengths.min
338
- @status[:contig_max] = lengths.max
339
- @status[:n50] = calc_n50(lengths)
340
- end
336
+ return if lengths.empty?
337
+
338
+ @status[:contig_min] = lengths.min
339
+ @status[:contig_max] = lengths.max
340
+ @status[:n50] = calc_n50(lengths)
341
341
  end
342
342
 
343
343
  N50 = Struct.new(:kmer, :n50)
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -160,7 +160,7 @@ module BioDSL
160
160
  entry = BioDSL::Seq.new_bp(record)
161
161
 
162
162
  @status[:sequences_in] += 1
163
- @status[:residues_in] += entry.length
163
+ @status[:residues_in] += entry.length
164
164
 
165
165
  if entry.qual
166
166
  @type = :fastq
@@ -216,9 +216,9 @@ module BioDSL
216
216
  BioDSL::Fasta.open(output_file) do |ios|
217
217
  ios.each do |entry|
218
218
  output << entry.to_bp
219
- @status[:records_out] += 1
219
+ @status[:records_out] += 1
220
220
  @status[:sequences_out] += 1
221
- @status[:residues_out] += entry.length
221
+ @status[:residues_out] += entry.length
222
222
 
223
223
  @lengths << entry.length
224
224
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -185,12 +185,12 @@ module BioDSL
185
185
 
186
186
  # Set default options.
187
187
  def defaults
188
- @options[:prefix] ||= 'taxonomy'
188
+ @options[:prefix] ||= 'taxonomy'
189
189
  @options[:kmer_size] ||= 8
190
190
  @options[:step_size] ||= 1
191
- @options[:hits_max] ||= 50
191
+ @options[:hits_max] ||= 50
192
192
  @options[:consensus] ||= 0.51
193
- @options[:coverage] ||= 0.9
193
+ @options[:coverage] ||= 0.9
194
194
  @options[:best_only] = true if @options[:best_only].nil?
195
195
  end
196
196
 
@@ -200,14 +200,14 @@ module BioDSL
200
200
  # @param i [Fixnum] Record number,
201
201
  # @param search [BioDSL::Taxonomy::Search] Search object.
202
202
  def classify_seq(record, i, search)
203
- @status[:sequences_in] += 1
203
+ @status[:sequences_in] += 1
204
204
  @status[:sequences_out] += 1
205
- @status[:residues_in] += record[:SEQ].length
206
- @status[:residues_out] += record[:SEQ].length
205
+ @status[:residues_in] += record[:SEQ].length
206
+ @status[:residues_out] += record[:SEQ].length
207
207
  seq_name = record[:SEQ_NAME] || i.to_s
208
208
 
209
209
  result = search.execute(BioDSL::Seq.new(seq_name: seq_name,
210
- seq: record[:SEQ]))
210
+ seq: record[:SEQ]))
211
211
 
212
212
  record[:TAXONOMY] = result.taxonomy
213
213
  record[:TAXONOMY_HITS] = result.hits
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -128,7 +128,7 @@ module BioDSL
128
128
  # Set default options.
129
129
  def defaults
130
130
  @options[:confidence] ||= 80
131
- @options[:cpus] ||= 1
131
+ @options[:cpus] ||= 1
132
132
  end
133
133
 
134
134
  # Process input data and save sequences to a temporary file for
@@ -143,10 +143,10 @@ module BioDSL
143
143
  @status[:records_in] += 1
144
144
 
145
145
  if record[:SEQ]
146
- @status[:sequences_in] += 1
146
+ @status[:sequences_in] += 1
147
147
  @status[:sequences_out] += 1
148
- @status[:residues_in] += record[:SEQ].length
149
- @status[:records_out] += record[:SEQ].length
148
+ @status[:residues_in] += record[:SEQ].length
149
+ @status[:records_out] += record[:SEQ].length
150
150
  seq_name = record[:SEQ_NAME] || i.to_s
151
151
 
152
152
  entry = BioDSL::Seq.new(seq_name: seq_name, seq: record[:SEQ])
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -166,16 +166,16 @@ module BioDSL
166
166
 
167
167
  # Set default option values.
168
168
  def defaults
169
- @options[:mismatch_percent] ||= 0
169
+ @options[:mismatch_percent] ||= 0
170
170
  @options[:insertion_percent] ||= 0
171
- @options[:deletion_percent] ||= 0
171
+ @options[:deletion_percent] ||= 0
172
172
  end
173
173
 
174
174
  # Calculate the mismatch percentage.
175
175
  #
176
176
  # @return [Float] Mismatch percentage.
177
177
  def calc_mis
178
- (@primer.length * @options[:mismatch_percent] * 0.01).round
178
+ (@primer.length * @options[:mismatch_percent] * 0.01).round
179
179
  end
180
180
 
181
181
  # Calculate the insertion percentage.
@@ -189,7 +189,7 @@ module BioDSL
189
189
  #
190
190
  # @return [Float] Deletion percentage.
191
191
  def calc_del
192
- (@primer.length * @options[:deletion_percent] * 0.01).round
192
+ (@primer.length * @options[:deletion_percent] * 0.01).round
193
193
  end
194
194
 
195
195
  # Reset any previous clip_primer results from record.
@@ -207,7 +207,7 @@ module BioDSL
207
207
  entry = BioDSL::Seq.new_bp(record)
208
208
 
209
209
  @status[:sequences_in] += 1
210
- @status[:residues_in] += entry.length
210
+ @status[:residues_in] += entry.length
211
211
 
212
212
  case @options[:direction]
213
213
  when :forward then clip_primer_forward(record, entry)
@@ -217,7 +217,7 @@ module BioDSL
217
217
  end
218
218
 
219
219
  @status[:sequences_out] += 1
220
- @status[:residues_out] += entry.length
220
+ @status[:residues_out] += entry.length
221
221
  end
222
222
 
223
223
  # Clip forward primer from entry and save clip information
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -89,8 +89,8 @@ module BioDSL
89
89
  process_input(input, output, tmp_in)
90
90
 
91
91
  BioDSL::Usearch.cluster_otus(input: tmp_in, output: tmp_out,
92
- identity: @options[:identity],
93
- verbose: @options[:verbose])
92
+ identity: @options[:identity],
93
+ verbose: @options[:verbose])
94
94
 
95
95
  process_output(output, tmp_out)
96
96
  end
@@ -172,8 +172,8 @@ module BioDSL
172
172
 
173
173
  output << record
174
174
  @status[:sequences_out] += 1
175
- @status[:residues_out] += record[:SEQ].length
176
- @status[:records_out] += 1
175
+ @status[:residues_out] += record[:SEQ].length
176
+ @status[:records_out] += 1
177
177
  end
178
178
  end
179
179
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -162,7 +162,7 @@ module BioDSL
162
162
  def write_tax(hash, output)
163
163
  hash.each_value do |record|
164
164
  output << record
165
- @status[:otus_out] += 1
165
+ @status[:otus_out] += 1
166
166
  @status[:records_out] += 1
167
167
  end
168
168
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -142,7 +142,7 @@ module BioDSL
142
142
 
143
143
  output << record
144
144
 
145
- @status[:hits_out] += 1
145
+ @status[:hits_out] += 1
146
146
  @status[:records_out] += 1
147
147
  end
148
148
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -106,10 +106,10 @@ module BioDSL
106
106
  entry.type = @type
107
107
  entry.complement!
108
108
 
109
- @status[:sequences_in] += 1
109
+ @status[:sequences_in] += 1
110
110
  @status[:sequences_out] += 1
111
- @status[:residues_in] += entry.length
112
- @status[:residues_out] += entry.length
111
+ @status[:residues_in] += entry.length
112
+ @status[:residues_out] += entry.length
113
113
 
114
114
  record.merge! entry.to_bp
115
115
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -74,7 +74,7 @@ module BioDSL
74
74
  #
75
75
  # @return [CountValues] Instance of class.
76
76
  def initialize(options)
77
- @options = options
77
+ @options = options
78
78
 
79
79
  check_options
80
80
 
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -68,7 +68,6 @@ module BioDSL
68
68
  # {:SEQ_NAME=>"test1", :SEQ=>"A-GTC", :SEQ_LEN=>5}
69
69
  # {:SEQ_NAME=>"test2", :SEQ=>"AGGTC", :SEQ_LEN=>5}
70
70
  #
71
- # rubocop:disable ClassLength
72
71
  class DegapSeq
73
72
  require 'narray'
74
73
 
@@ -157,14 +156,14 @@ module BioDSL
157
156
  # @param seq [String] Sequences.
158
157
  def mask_add(seq)
159
158
  @status[:sequences_in] += 1
160
- @status[:residues_in] += seq.length
159
+ @status[:residues_in] += seq.length
161
160
 
162
161
  @max_len ||= seq.length
163
162
 
164
163
  check_length(seq)
165
164
 
166
165
  @na_mask ||= NArray.int(seq.length)
167
- na_seq = NArray.to_na(seq, 'byte')
166
+ na_seq = NArray.to_na(seq, 'byte')
168
167
  @indels.each_char { |c| @na_mask += na_seq.eq(c.ord) }
169
168
  end
170
169
 
@@ -212,7 +211,7 @@ module BioDSL
212
211
  record[:SEQ_LEN] = record[:SEQ].length
213
212
 
214
213
  @status[:sequences_out] += 1
215
- @status[:residues_out] += record[:SEQ].length
214
+ @status[:residues_out] += record[:SEQ].length
216
215
  end
217
216
 
218
217
  # Remove all gaps from all sequences in input stream and output to output
@@ -240,12 +239,12 @@ module BioDSL
240
239
  entry = BioDSL::Seq.new_bp(record)
241
240
 
242
241
  @status[:sequences_in] += 1
243
- @status[:residues_in] += entry.length
242
+ @status[:residues_in] += entry.length
244
243
 
245
244
  entry.seq.delete!(@indels)
246
245
 
247
246
  @status[:sequences_out] += 1
248
- @status[:residues_out] += entry.length
247
+ @status[:residues_out] += entry.length
249
248
 
250
249
  record.merge! entry.to_bp
251
250
  end