BioDSL 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/BioDSL.gemspec +1 -1
  4. data/Gemfile +6 -0
  5. data/README.md +289 -155
  6. data/Rakefile +18 -16
  7. data/lib/BioDSL.rb +1 -1
  8. data/lib/BioDSL/cary.rb +78 -53
  9. data/lib/BioDSL/command.rb +2 -2
  10. data/lib/BioDSL/commands.rb +1 -1
  11. data/lib/BioDSL/commands/add_key.rb +1 -1
  12. data/lib/BioDSL/commands/align_seq_mothur.rb +4 -4
  13. data/lib/BioDSL/commands/analyze_residue_distribution.rb +5 -5
  14. data/lib/BioDSL/commands/assemble_pairs.rb +13 -13
  15. data/lib/BioDSL/commands/assemble_seq_idba.rb +7 -9
  16. data/lib/BioDSL/commands/assemble_seq_ray.rb +13 -13
  17. data/lib/BioDSL/commands/assemble_seq_spades.rb +4 -4
  18. data/lib/BioDSL/commands/classify_seq.rb +8 -8
  19. data/lib/BioDSL/commands/classify_seq_mothur.rb +5 -5
  20. data/lib/BioDSL/commands/clip_primer.rb +7 -7
  21. data/lib/BioDSL/commands/cluster_otus.rb +5 -5
  22. data/lib/BioDSL/commands/collapse_otus.rb +2 -2
  23. data/lib/BioDSL/commands/collect_otus.rb +2 -2
  24. data/lib/BioDSL/commands/complement_seq.rb +4 -4
  25. data/lib/BioDSL/commands/count.rb +1 -1
  26. data/lib/BioDSL/commands/count_values.rb +2 -2
  27. data/lib/BioDSL/commands/degap_seq.rb +6 -7
  28. data/lib/BioDSL/commands/dereplicate_seq.rb +1 -1
  29. data/lib/BioDSL/commands/dump.rb +2 -2
  30. data/lib/BioDSL/commands/filter_rrna.rb +4 -4
  31. data/lib/BioDSL/commands/genecall.rb +7 -7
  32. data/lib/BioDSL/commands/grab.rb +1 -1
  33. data/lib/BioDSL/commands/index_taxonomy.rb +3 -3
  34. data/lib/BioDSL/commands/mask_seq.rb +4 -4
  35. data/lib/BioDSL/commands/mean_scores.rb +2 -2
  36. data/lib/BioDSL/commands/merge_pair_seq.rb +3 -3
  37. data/lib/BioDSL/commands/merge_table.rb +1 -1
  38. data/lib/BioDSL/commands/merge_values.rb +1 -1
  39. data/lib/BioDSL/commands/plot_heatmap.rb +4 -5
  40. data/lib/BioDSL/commands/plot_histogram.rb +4 -4
  41. data/lib/BioDSL/commands/plot_matches.rb +5 -5
  42. data/lib/BioDSL/commands/plot_residue_distribution.rb +6 -6
  43. data/lib/BioDSL/commands/plot_scores.rb +7 -7
  44. data/lib/BioDSL/commands/random.rb +1 -1
  45. data/lib/BioDSL/commands/read_fasta.rb +9 -9
  46. data/lib/BioDSL/commands/read_fastq.rb +16 -16
  47. data/lib/BioDSL/commands/read_table.rb +2 -3
  48. data/lib/BioDSL/commands/reverse_seq.rb +4 -4
  49. data/lib/BioDSL/commands/slice_align.rb +4 -4
  50. data/lib/BioDSL/commands/slice_seq.rb +3 -3
  51. data/lib/BioDSL/commands/sort.rb +1 -1
  52. data/lib/BioDSL/commands/split_pair_seq.rb +6 -7
  53. data/lib/BioDSL/commands/split_values.rb +2 -2
  54. data/lib/BioDSL/commands/trim_primer.rb +13 -8
  55. data/lib/BioDSL/commands/trim_seq.rb +5 -5
  56. data/lib/BioDSL/commands/uchime_ref.rb +6 -6
  57. data/lib/BioDSL/commands/uclust.rb +5 -5
  58. data/lib/BioDSL/commands/unique_values.rb +1 -1
  59. data/lib/BioDSL/commands/usearch_global.rb +2 -2
  60. data/lib/BioDSL/commands/usearch_local.rb +2 -2
  61. data/lib/BioDSL/commands/write_fasta.rb +7 -9
  62. data/lib/BioDSL/commands/write_fastq.rb +4 -4
  63. data/lib/BioDSL/commands/write_table.rb +3 -3
  64. data/lib/BioDSL/commands/write_tree.rb +2 -3
  65. data/lib/BioDSL/config.rb +2 -2
  66. data/lib/BioDSL/csv.rb +8 -10
  67. data/lib/BioDSL/debug.rb +1 -1
  68. data/lib/BioDSL/fasta.rb +54 -40
  69. data/lib/BioDSL/fastq.rb +35 -32
  70. data/lib/BioDSL/filesys.rb +56 -47
  71. data/lib/BioDSL/fork.rb +1 -1
  72. data/lib/BioDSL/hamming.rb +1 -1
  73. data/lib/BioDSL/helpers.rb +1 -1
  74. data/lib/BioDSL/helpers/aux_helper.rb +1 -1
  75. data/lib/BioDSL/helpers/email_helper.rb +1 -1
  76. data/lib/BioDSL/helpers/history_helper.rb +1 -1
  77. data/lib/BioDSL/helpers/log_helper.rb +1 -1
  78. data/lib/BioDSL/helpers/options_helper.rb +1 -1
  79. data/lib/BioDSL/helpers/status_helper.rb +1 -1
  80. data/lib/BioDSL/html_report.rb +1 -1
  81. data/lib/BioDSL/math.rb +1 -1
  82. data/lib/BioDSL/mummer.rb +1 -1
  83. data/lib/BioDSL/pipeline.rb +1 -1
  84. data/lib/BioDSL/seq.rb +240 -231
  85. data/lib/BioDSL/seq/ambiguity.rb +1 -1
  86. data/lib/BioDSL/seq/assemble.rb +1 -1
  87. data/lib/BioDSL/seq/backtrack.rb +93 -76
  88. data/lib/BioDSL/seq/digest.rb +1 -1
  89. data/lib/BioDSL/seq/dynamic.rb +43 -55
  90. data/lib/BioDSL/seq/homopolymer.rb +34 -36
  91. data/lib/BioDSL/seq/kmer.rb +67 -50
  92. data/lib/BioDSL/seq/levenshtein.rb +35 -40
  93. data/lib/BioDSL/seq/translate.rb +64 -55
  94. data/lib/BioDSL/seq/trim.rb +60 -50
  95. data/lib/BioDSL/serializer.rb +1 -1
  96. data/lib/BioDSL/stream.rb +1 -1
  97. data/lib/BioDSL/taxonomy.rb +1 -1
  98. data/lib/BioDSL/test.rb +1 -1
  99. data/lib/BioDSL/tmp_dir.rb +1 -1
  100. data/lib/BioDSL/usearch.rb +1 -1
  101. data/lib/BioDSL/verbose.rb +1 -1
  102. data/lib/BioDSL/version.rb +2 -2
  103. data/test/BioDSL/commands/test_add_key.rb +1 -1
  104. data/test/BioDSL/commands/test_align_seq_mothur.rb +1 -1
  105. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +1 -1
  106. data/test/BioDSL/commands/test_assemble_pairs.rb +1 -1
  107. data/test/BioDSL/commands/test_assemble_seq_idba.rb +1 -1
  108. data/test/BioDSL/commands/test_assemble_seq_ray.rb +1 -1
  109. data/test/BioDSL/commands/test_assemble_seq_spades.rb +1 -1
  110. data/test/BioDSL/commands/test_classify_seq.rb +1 -1
  111. data/test/BioDSL/commands/test_classify_seq_mothur.rb +1 -1
  112. data/test/BioDSL/commands/test_clip_primer.rb +1 -1
  113. data/test/BioDSL/commands/test_cluster_otus.rb +1 -1
  114. data/test/BioDSL/commands/test_collapse_otus.rb +1 -1
  115. data/test/BioDSL/commands/test_collect_otus.rb +1 -1
  116. data/test/BioDSL/commands/test_complement_seq.rb +1 -1
  117. data/test/BioDSL/commands/test_count.rb +1 -1
  118. data/test/BioDSL/commands/test_count_values.rb +1 -1
  119. data/test/BioDSL/commands/test_degap_seq.rb +1 -1
  120. data/test/BioDSL/commands/test_dereplicate_seq.rb +1 -1
  121. data/test/BioDSL/commands/test_dump.rb +1 -1
  122. data/test/BioDSL/commands/test_filter_rrna.rb +1 -1
  123. data/test/BioDSL/commands/test_genecall.rb +1 -1
  124. data/test/BioDSL/commands/test_grab.rb +1 -1
  125. data/test/BioDSL/commands/test_index_taxonomy.rb +1 -1
  126. data/test/BioDSL/commands/test_mask_seq.rb +1 -1
  127. data/test/BioDSL/commands/test_mean_scores.rb +1 -1
  128. data/test/BioDSL/commands/test_merge_pair_seq.rb +1 -1
  129. data/test/BioDSL/commands/test_merge_table.rb +1 -1
  130. data/test/BioDSL/commands/test_merge_values.rb +1 -1
  131. data/test/BioDSL/commands/test_plot_heatmap.rb +1 -1
  132. data/test/BioDSL/commands/test_plot_histogram.rb +1 -1
  133. data/test/BioDSL/commands/test_plot_matches.rb +1 -1
  134. data/test/BioDSL/commands/test_plot_residue_distribution.rb +1 -1
  135. data/test/BioDSL/commands/test_plot_scores.rb +1 -1
  136. data/test/BioDSL/commands/test_random.rb +1 -1
  137. data/test/BioDSL/commands/test_read_fasta.rb +1 -1
  138. data/test/BioDSL/commands/test_read_fastq.rb +1 -1
  139. data/test/BioDSL/commands/test_read_table.rb +1 -1
  140. data/test/BioDSL/commands/test_reverse_seq.rb +1 -1
  141. data/test/BioDSL/commands/test_slice_align.rb +1 -1
  142. data/test/BioDSL/commands/test_slice_seq.rb +1 -1
  143. data/test/BioDSL/commands/test_sort.rb +1 -1
  144. data/test/BioDSL/commands/test_split_pair_seq.rb +1 -1
  145. data/test/BioDSL/commands/test_split_values.rb +1 -1
  146. data/test/BioDSL/commands/test_trim_primer.rb +1 -1
  147. data/test/BioDSL/commands/test_trim_seq.rb +1 -1
  148. data/test/BioDSL/commands/test_uchime_ref.rb +1 -1
  149. data/test/BioDSL/commands/test_uclust.rb +1 -1
  150. data/test/BioDSL/commands/test_unique_values.rb +1 -1
  151. data/test/BioDSL/commands/test_usearch_global.rb +1 -1
  152. data/test/BioDSL/commands/test_usearch_local.rb +1 -1
  153. data/test/BioDSL/commands/test_write_fasta.rb +1 -1
  154. data/test/BioDSL/commands/test_write_fastq.rb +1 -1
  155. data/test/BioDSL/commands/test_write_table.rb +1 -1
  156. data/test/BioDSL/commands/test_write_tree.rb +1 -1
  157. data/test/BioDSL/helpers/test_options_helper.rb +3 -3
  158. data/test/BioDSL/seq/test_assemble.rb +58 -56
  159. data/test/BioDSL/seq/test_backtrack.rb +83 -81
  160. data/test/BioDSL/seq/test_digest.rb +47 -45
  161. data/test/BioDSL/seq/test_dynamic.rb +66 -64
  162. data/test/BioDSL/seq/test_homopolymer.rb +35 -33
  163. data/test/BioDSL/seq/test_kmer.rb +29 -28
  164. data/test/BioDSL/seq/test_translate.rb +44 -42
  165. data/test/BioDSL/seq/test_trim.rb +59 -57
  166. data/test/BioDSL/test_cary.rb +1 -1
  167. data/test/BioDSL/test_command.rb +2 -2
  168. data/test/BioDSL/test_csv.rb +34 -31
  169. data/test/BioDSL/test_debug.rb +31 -31
  170. data/test/BioDSL/test_fasta.rb +30 -29
  171. data/test/BioDSL/test_fastq.rb +27 -26
  172. data/test/BioDSL/test_filesys.rb +28 -27
  173. data/test/BioDSL/test_fork.rb +29 -28
  174. data/test/BioDSL/test_math.rb +31 -30
  175. data/test/BioDSL/test_mummer.rb +1 -1
  176. data/test/BioDSL/test_pipeline.rb +1 -1
  177. data/test/BioDSL/test_seq.rb +42 -41
  178. data/test/BioDSL/test_serializer.rb +35 -33
  179. data/test/BioDSL/test_stream.rb +28 -27
  180. data/test/BioDSL/test_taxonomy.rb +38 -37
  181. data/test/BioDSL/test_test.rb +32 -31
  182. data/test/BioDSL/test_tmp_dir.rb +1 -1
  183. data/test/BioDSL/test_usearch.rb +28 -27
  184. data/test/BioDSL/test_verbose.rb +32 -31
  185. data/test/helper.rb +34 -31
  186. metadata +3 -2
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
24
+ # This software is part of BioDSL (http://maasha.github.io/BioDSL). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -143,7 +143,7 @@ module BioDSL
143
143
  case
144
144
  when @options[:first] && @pair then read_first_pair(output)
145
145
  when @options[:first] then read_first_single(output)
146
- when @options[:last] && @pair then read_last_pair(output)
146
+ when @options[:last] && @pair then read_last_pair(output)
147
147
  when @options[:last] then read_last_single(output)
148
148
  when @pair then read_all_pair(output)
149
149
  else
@@ -176,12 +176,12 @@ module BioDSL
176
176
  return unless input
177
177
 
178
178
  input.each do |record|
179
- @status[:records_in] += 1
179
+ @status[:records_in] += 1
180
180
  @status[:records_out] += 1
181
181
 
182
182
  if (seq = record[:SEQ])
183
183
  @status[:sequences_in] += 1
184
- @status[:residues_in] += seq.length
184
+ @status[:residues_in] += seq.length
185
185
  end
186
186
 
187
187
  output << record
@@ -197,10 +197,10 @@ module BioDSL
197
197
  ios.each do |entry|
198
198
  check_entry(entry)
199
199
  output << entry.to_bp
200
- @status[:records_out] += 1
200
+ @status[:records_out] += 1
201
201
  @status[:sequences_out] += 1
202
- @status[:residues_out] += entry.length
203
- return if @status[:sequences_out] >= @options[:first]
202
+ @status[:residues_out] += entry.length
203
+ break if @status[:sequences_out] >= @options[:first]
204
204
  end
205
205
  end
206
206
  end
@@ -220,10 +220,10 @@ module BioDSL
220
220
  reverse_complement(entry2) if @options[:reverse_complement]
221
221
  output << entry1.to_bp
222
222
  output << entry2.to_bp
223
- @status[:records_out] += 2
223
+ @status[:records_out] += 2
224
224
  @status[:sequences_out] += 2
225
- @status[:residues_out] += entry1.length + entry2.length
226
- return if @status[:sequences_out] >= @options[:first]
225
+ @status[:residues_out] += entry1.length + entry2.length
226
+ break if @status[:sequences_out] >= @options[:first]
227
227
  end
228
228
  end
229
229
  end
@@ -279,9 +279,9 @@ module BioDSL
279
279
  ios.each do |entry|
280
280
  check_entry(entry)
281
281
  output << entry.to_bp
282
- @status[:records_out] += 1
282
+ @status[:records_out] += 1
283
283
  @status[:sequences_out] += 1
284
- @status[:residues_out] += entry.length
284
+ @status[:residues_out] += entry.length
285
285
  end
286
286
  end
287
287
  end
@@ -299,9 +299,9 @@ module BioDSL
299
299
  reverse_complement(entry2) if @options[:reverse_complement]
300
300
  output << entry1.to_bp
301
301
  output << entry2.to_bp
302
- @status[:records_out] += 2
302
+ @status[:records_out] += 2
303
303
  @status[:sequences_out] += 2
304
- @status[:residues_out] += entry1.length + entry2.length
304
+ @status[:residues_out] += entry1.length + entry2.length
305
305
  end
306
306
  end
307
307
  end
@@ -405,9 +405,9 @@ module BioDSL
405
405
  @buffer.each do |entry|
406
406
  output << entry.to_bp
407
407
 
408
- @status[:records_out] += 1
408
+ @status[:records_out] += 1
409
409
  @status[:sequences_out] += 1
410
- @status[:residues_out] += entry.length
410
+ @status[:residues_out] += entry.length
411
411
  end
412
412
  end
413
413
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -173,7 +173,6 @@ module BioDSL
173
173
  # {:Organism=>"Mouse"}
174
174
  # {:Organism=>"Cat"}
175
175
  #
176
- # rubocop: disable ClassLength
177
176
  class ReadTable
178
177
  STATS = %i(records_in records_out)
179
178
 
@@ -321,7 +320,7 @@ module BioDSL
321
320
  return unless output
322
321
  input.each do |record|
323
322
  output << record
324
- @status[:records_in] += 1
323
+ @status[:records_in] += 1
325
324
  @status[:records_out] += 1
326
325
  end
327
326
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -102,10 +102,10 @@ module BioDSL
102
102
  entry = BioDSL::Seq.new_bp(record)
103
103
  entry.reverse!
104
104
 
105
- @status[:sequences_in] += 1
105
+ @status[:sequences_in] += 1
106
106
  @status[:sequences_out] += 1
107
- @status[:residues_in] += entry.length
108
- @status[:residues_out] += entry.length
107
+ @status[:residues_in] += entry.length
108
+ @status[:residues_out] += entry.length
109
109
 
110
110
  record.merge! entry.to_bp
111
111
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -231,7 +231,7 @@ module BioDSL
231
231
  def defaults
232
232
  @max_mis = @options[:max_mismatches] || 2
233
233
  @max_ins = @options[:max_insertions] || 1
234
- @max_del = @options[:max_deletions] || 1
234
+ @max_del = @options[:max_deletions] || 1
235
235
  end
236
236
 
237
237
  # Parse FASTA file with one gapped template sequence if specified.
@@ -312,7 +312,7 @@ module BioDSL
312
312
  entry = BioDSL::Seq.new_bp(record)
313
313
 
314
314
  @status[:sequences_in] += 1
315
- @status[:residues_in] += entry.length
315
+ @status[:residues_in] += entry.length
316
316
 
317
317
  setup_slice(entry) unless @slice
318
318
 
@@ -321,7 +321,7 @@ module BioDSL
321
321
  record.merge! entry.to_bp
322
322
 
323
323
  @status[:sequences_out] += 1
324
- @status[:residues_out] += entry.length
324
+ @status[:residues_out] += entry.length
325
325
  end
326
326
 
327
327
  # Usings primers to locate slice positions in entry.
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -138,12 +138,12 @@ module BioDSL
138
138
  entry = BioDSL::Seq.new_bp(record)
139
139
 
140
140
  @status[:sequences_in] += 1
141
- @status[:residues_in] += entry.length
141
+ @status[:residues_in] += entry.length
142
142
 
143
143
  entry = entry[@options[:slice]]
144
144
 
145
145
  @status[:sequences_out] += 1
146
- @status[:residues_out] += entry.length
146
+ @status[:residues_out] += entry.length
147
147
 
148
148
  record.merge! entry.to_bp
149
149
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -146,12 +146,11 @@ module BioDSL
146
146
  # @param output [Enumerator::Yielder] Output stream.
147
147
  # @param record [Hash] BioDSL record.
148
148
  #
149
- # rubocop: disable Metrics/AbcSize
150
149
  def split_pair_seq(output, record)
151
150
  entry = BioDSL::Seq.new_bp(record)
152
151
 
153
152
  @status[:sequences_in] += 1
154
- @status[:residues_in] += entry.length
153
+ @status[:residues_in] += entry.length
155
154
 
156
155
  pos = get_split_pos(record, entry)
157
156
 
@@ -161,13 +160,13 @@ module BioDSL
161
160
  output << entry2.to_bp
162
161
 
163
162
  @status[:sequences_out] += 2
164
- @status[:residues_out] += entry1.length + entry2.length
165
- @status[:records_out] += 2
163
+ @status[:residues_out] += entry1.length + entry2.length
164
+ @status[:records_out] += 2
166
165
  end
167
166
 
168
167
  # Given a record locate the sequence split position.
169
168
  #
170
- # @param record [Hash] BioDSL record.
169
+ # @param record [Hash] BioDSL record.
171
170
  # @param entry [BioDSL::Seq] Sequence entry.
172
171
  #
173
172
  # @return [Integer] Sequence split position.
@@ -210,7 +209,7 @@ module BioDSL
210
209
  def fix_seq_names(entry1, entry2)
211
210
  if entry1.seq_name =~ /^[^ ]+ \d:/
212
211
  entry2.seq_name.sub!(/ \d:/, ' 2:')
213
- elsif entry1.seq_name =~ /^.+\/\d$/
212
+ elsif entry1.seq_name =~ %r{^.+\/\d$}
214
213
  entry2.seq_name[-1] = '2'
215
214
  else
216
215
  fail "Could not match sequence name: #{entry1.seq_name}"
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -82,7 +82,7 @@ module BioDSL
82
82
  #
83
83
  # @return [SplitValues] Class instance.
84
84
  def initialize(options)
85
- @options = options
85
+ @options = options
86
86
 
87
87
  check_options
88
88
 
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -131,10 +131,10 @@ module BioDSL
131
131
  # @return [TrimPrimer] Class instance.
132
132
  def initialize(options)
133
133
  @options = options
134
- @options[:overlap_min] ||= 1
135
- @options[:mismatch_percent] ||= 0
134
+ @options[:overlap_min] ||= 1
135
+ @options[:mismatch_percent] ||= 0
136
136
  @options[:insertion_percent] ||= 0
137
- @options[:deletion_percent] ||= 0
137
+ @options[:deletion_percent] ||= 0
138
138
  @pattern = pattern
139
139
  @hit = false
140
140
 
@@ -153,6 +153,7 @@ module BioDSL
153
153
 
154
154
  if record[:SEQ] && record[:SEQ].length > 0
155
155
  @status[:sequences_in] += 1
156
+ @status[:sequences_out] += 1
156
157
 
157
158
  case @options[:direction]
158
159
  when :forward then trim_forward(record)
@@ -198,7 +199,7 @@ module BioDSL
198
199
  def trim_forward(record)
199
200
  entry = BioDSL::Seq.new_bp(record)
200
201
 
201
- @status[:residues_in] += entry.length
202
+ @status[:residues_in] += entry.length
202
203
 
203
204
  while @pattern.length >= @options[:overlap_min]
204
205
  if (match = match_forward(entry))
@@ -235,6 +236,8 @@ module BioDSL
235
236
  def merge_forward(record, entry, match)
236
237
  entry = entry[match.pos + match.length..-1]
237
238
 
239
+ @status[:residues_out] += entry.length
240
+
238
241
  record.merge!(entry.to_bp)
239
242
  record[:TRIM_PRIMER_DIR] = 'FORWARD'
240
243
  record[:TRIM_PRIMER_POS] = match.pos
@@ -248,7 +251,7 @@ module BioDSL
248
251
  def trim_reverse(record)
249
252
  entry = BioDSL::Seq.new_bp(record)
250
253
 
251
- @status[:residues_in] += entry.length
254
+ @status[:residues_in] += entry.length
252
255
 
253
256
  while @pattern.length >= @options[:overlap_min]
254
257
  if (match = match_reverse(entry))
@@ -288,6 +291,8 @@ module BioDSL
288
291
  def merge_reverse(record, entry, match)
289
292
  entry = entry[0...match.pos]
290
293
 
294
+ @status[:residues_out] += entry.length
295
+
291
296
  record.merge!(entry.to_bp)
292
297
  record[:TRIM_PRIMER_DIR] = 'REVERSE'
293
298
  record[:TRIM_PRIMER_POS] = match.pos
@@ -302,9 +307,9 @@ module BioDSL
302
307
  #
303
308
  # @return [Hash] Match options hash.
304
309
  def match_options(length)
305
- mis = (length * @options[:mismatch_percent] * 0.01).round
310
+ mis = (length * @options[:mismatch_percent] * 0.01).round
306
311
  ins = (length * @options[:insertion_percent] * 0.01).round
307
- del = (length * @options[:deletion_percent] * 0.01).round
312
+ del = (length * @options[:deletion_percent] * 0.01).round
308
313
 
309
314
  {max_mismatches: mis,
310
315
  max_insertions: ins,
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -164,8 +164,8 @@ module BioDSL
164
164
  # Set defaul options.
165
165
  def defaults
166
166
  @options[:quality_min] ||= 20
167
- @options[:mode] ||= :both
168
- @options[:length_min] ||= 3
167
+ @options[:mode] ||= :both
168
+ @options[:length_min] ||= 3
169
169
  end
170
170
 
171
171
  # Trim sequence in a given record with sequence info.
@@ -175,7 +175,7 @@ module BioDSL
175
175
  entry = BioDSL::Seq.new_bp(record)
176
176
 
177
177
  @status[:sequences_in] += 1
178
- @status[:residues_in] += entry.length
178
+ @status[:residues_in] += entry.length
179
179
 
180
180
  case @mode
181
181
  when :both then entry.quality_trim!(@min, @len)
@@ -184,7 +184,7 @@ module BioDSL
184
184
  end
185
185
 
186
186
  @status[:sequences_out] += 1
187
- @status[:residues_out] += entry.length
187
+ @status[:residues_out] += entry.length
188
188
 
189
189
  record.merge! entry.to_bp
190
190
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -71,8 +71,8 @@ module BioDSL
71
71
  @options = options
72
72
  aux_exist('usearch')
73
73
  check_options
74
- @options[:cpus] ||= 1
75
- @options[:strand] ||= 'plus' # This option cant be changed in usearch7.0
74
+ @options[:cpus] ||= 1
75
+ @options[:strand] ||= 'plus' # This option cant be changed in usearch7.0
76
76
  end
77
77
 
78
78
  # Return command lambda for uchime_ref.
@@ -115,7 +115,7 @@ module BioDSL
115
115
 
116
116
  if record[:SEQ]
117
117
  @status[:sequences_in] += 1
118
- @status[:residues_in] += record[:SEQ].length
118
+ @status[:residues_in] += record[:SEQ].length
119
119
  seq_name = record[:SEQ_NAME] || i.to_s
120
120
 
121
121
  entry = BioDSL::Seq.new(seq_name: seq_name, seq: record[:SEQ])
@@ -161,8 +161,8 @@ module BioDSL
161
161
 
162
162
  output << record
163
163
  @status[:sequences_out] += 1
164
- @status[:residues_out] += entry.length
165
- @status[:records_out] += 1
164
+ @status[:residues_out] += entry.length
165
+ @status[:records_out] += 1
166
166
  end
167
167
  end
168
168
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -232,9 +232,9 @@ module BioDSL
232
232
  record.merge!(entry.to_bp)
233
233
 
234
234
  output << record
235
- @status[:records_out] += 1
235
+ @status[:records_out] += 1
236
236
  @status[:sequences_out] += 1
237
- @status[:residues_out] += entry.length
237
+ @status[:residues_out] += entry.length
238
238
  end
239
239
  end
240
240
  end
@@ -273,9 +273,9 @@ module BioDSL
273
273
 
274
274
  if (r = results[record[:SEQ_NAME]])
275
275
  output << record.merge(r)
276
- @status[:records_out] += 1
276
+ @status[:records_out] += 1
277
277
  @status[:sequences_out] += 1
278
- @status[:residues_out] += record[:SEQ].length
278
+ @status[:residues_out] += record[:SEQ].length
279
279
  else
280
280
  fail BioDSL::UsearchError, 'Sequence name: ' \
281
281
  "#{record[:SEQ_NAME]} not found in uclust results"