BioDSL 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (186) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/BioDSL.gemspec +1 -1
  4. data/Gemfile +6 -0
  5. data/README.md +289 -155
  6. data/Rakefile +18 -16
  7. data/lib/BioDSL.rb +1 -1
  8. data/lib/BioDSL/cary.rb +78 -53
  9. data/lib/BioDSL/command.rb +2 -2
  10. data/lib/BioDSL/commands.rb +1 -1
  11. data/lib/BioDSL/commands/add_key.rb +1 -1
  12. data/lib/BioDSL/commands/align_seq_mothur.rb +4 -4
  13. data/lib/BioDSL/commands/analyze_residue_distribution.rb +5 -5
  14. data/lib/BioDSL/commands/assemble_pairs.rb +13 -13
  15. data/lib/BioDSL/commands/assemble_seq_idba.rb +7 -9
  16. data/lib/BioDSL/commands/assemble_seq_ray.rb +13 -13
  17. data/lib/BioDSL/commands/assemble_seq_spades.rb +4 -4
  18. data/lib/BioDSL/commands/classify_seq.rb +8 -8
  19. data/lib/BioDSL/commands/classify_seq_mothur.rb +5 -5
  20. data/lib/BioDSL/commands/clip_primer.rb +7 -7
  21. data/lib/BioDSL/commands/cluster_otus.rb +5 -5
  22. data/lib/BioDSL/commands/collapse_otus.rb +2 -2
  23. data/lib/BioDSL/commands/collect_otus.rb +2 -2
  24. data/lib/BioDSL/commands/complement_seq.rb +4 -4
  25. data/lib/BioDSL/commands/count.rb +1 -1
  26. data/lib/BioDSL/commands/count_values.rb +2 -2
  27. data/lib/BioDSL/commands/degap_seq.rb +6 -7
  28. data/lib/BioDSL/commands/dereplicate_seq.rb +1 -1
  29. data/lib/BioDSL/commands/dump.rb +2 -2
  30. data/lib/BioDSL/commands/filter_rrna.rb +4 -4
  31. data/lib/BioDSL/commands/genecall.rb +7 -7
  32. data/lib/BioDSL/commands/grab.rb +1 -1
  33. data/lib/BioDSL/commands/index_taxonomy.rb +3 -3
  34. data/lib/BioDSL/commands/mask_seq.rb +4 -4
  35. data/lib/BioDSL/commands/mean_scores.rb +2 -2
  36. data/lib/BioDSL/commands/merge_pair_seq.rb +3 -3
  37. data/lib/BioDSL/commands/merge_table.rb +1 -1
  38. data/lib/BioDSL/commands/merge_values.rb +1 -1
  39. data/lib/BioDSL/commands/plot_heatmap.rb +4 -5
  40. data/lib/BioDSL/commands/plot_histogram.rb +4 -4
  41. data/lib/BioDSL/commands/plot_matches.rb +5 -5
  42. data/lib/BioDSL/commands/plot_residue_distribution.rb +6 -6
  43. data/lib/BioDSL/commands/plot_scores.rb +7 -7
  44. data/lib/BioDSL/commands/random.rb +1 -1
  45. data/lib/BioDSL/commands/read_fasta.rb +9 -9
  46. data/lib/BioDSL/commands/read_fastq.rb +16 -16
  47. data/lib/BioDSL/commands/read_table.rb +2 -3
  48. data/lib/BioDSL/commands/reverse_seq.rb +4 -4
  49. data/lib/BioDSL/commands/slice_align.rb +4 -4
  50. data/lib/BioDSL/commands/slice_seq.rb +3 -3
  51. data/lib/BioDSL/commands/sort.rb +1 -1
  52. data/lib/BioDSL/commands/split_pair_seq.rb +6 -7
  53. data/lib/BioDSL/commands/split_values.rb +2 -2
  54. data/lib/BioDSL/commands/trim_primer.rb +13 -8
  55. data/lib/BioDSL/commands/trim_seq.rb +5 -5
  56. data/lib/BioDSL/commands/uchime_ref.rb +6 -6
  57. data/lib/BioDSL/commands/uclust.rb +5 -5
  58. data/lib/BioDSL/commands/unique_values.rb +1 -1
  59. data/lib/BioDSL/commands/usearch_global.rb +2 -2
  60. data/lib/BioDSL/commands/usearch_local.rb +2 -2
  61. data/lib/BioDSL/commands/write_fasta.rb +7 -9
  62. data/lib/BioDSL/commands/write_fastq.rb +4 -4
  63. data/lib/BioDSL/commands/write_table.rb +3 -3
  64. data/lib/BioDSL/commands/write_tree.rb +2 -3
  65. data/lib/BioDSL/config.rb +2 -2
  66. data/lib/BioDSL/csv.rb +8 -10
  67. data/lib/BioDSL/debug.rb +1 -1
  68. data/lib/BioDSL/fasta.rb +54 -40
  69. data/lib/BioDSL/fastq.rb +35 -32
  70. data/lib/BioDSL/filesys.rb +56 -47
  71. data/lib/BioDSL/fork.rb +1 -1
  72. data/lib/BioDSL/hamming.rb +1 -1
  73. data/lib/BioDSL/helpers.rb +1 -1
  74. data/lib/BioDSL/helpers/aux_helper.rb +1 -1
  75. data/lib/BioDSL/helpers/email_helper.rb +1 -1
  76. data/lib/BioDSL/helpers/history_helper.rb +1 -1
  77. data/lib/BioDSL/helpers/log_helper.rb +1 -1
  78. data/lib/BioDSL/helpers/options_helper.rb +1 -1
  79. data/lib/BioDSL/helpers/status_helper.rb +1 -1
  80. data/lib/BioDSL/html_report.rb +1 -1
  81. data/lib/BioDSL/math.rb +1 -1
  82. data/lib/BioDSL/mummer.rb +1 -1
  83. data/lib/BioDSL/pipeline.rb +1 -1
  84. data/lib/BioDSL/seq.rb +240 -231
  85. data/lib/BioDSL/seq/ambiguity.rb +1 -1
  86. data/lib/BioDSL/seq/assemble.rb +1 -1
  87. data/lib/BioDSL/seq/backtrack.rb +93 -76
  88. data/lib/BioDSL/seq/digest.rb +1 -1
  89. data/lib/BioDSL/seq/dynamic.rb +43 -55
  90. data/lib/BioDSL/seq/homopolymer.rb +34 -36
  91. data/lib/BioDSL/seq/kmer.rb +67 -50
  92. data/lib/BioDSL/seq/levenshtein.rb +35 -40
  93. data/lib/BioDSL/seq/translate.rb +64 -55
  94. data/lib/BioDSL/seq/trim.rb +60 -50
  95. data/lib/BioDSL/serializer.rb +1 -1
  96. data/lib/BioDSL/stream.rb +1 -1
  97. data/lib/BioDSL/taxonomy.rb +1 -1
  98. data/lib/BioDSL/test.rb +1 -1
  99. data/lib/BioDSL/tmp_dir.rb +1 -1
  100. data/lib/BioDSL/usearch.rb +1 -1
  101. data/lib/BioDSL/verbose.rb +1 -1
  102. data/lib/BioDSL/version.rb +2 -2
  103. data/test/BioDSL/commands/test_add_key.rb +1 -1
  104. data/test/BioDSL/commands/test_align_seq_mothur.rb +1 -1
  105. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +1 -1
  106. data/test/BioDSL/commands/test_assemble_pairs.rb +1 -1
  107. data/test/BioDSL/commands/test_assemble_seq_idba.rb +1 -1
  108. data/test/BioDSL/commands/test_assemble_seq_ray.rb +1 -1
  109. data/test/BioDSL/commands/test_assemble_seq_spades.rb +1 -1
  110. data/test/BioDSL/commands/test_classify_seq.rb +1 -1
  111. data/test/BioDSL/commands/test_classify_seq_mothur.rb +1 -1
  112. data/test/BioDSL/commands/test_clip_primer.rb +1 -1
  113. data/test/BioDSL/commands/test_cluster_otus.rb +1 -1
  114. data/test/BioDSL/commands/test_collapse_otus.rb +1 -1
  115. data/test/BioDSL/commands/test_collect_otus.rb +1 -1
  116. data/test/BioDSL/commands/test_complement_seq.rb +1 -1
  117. data/test/BioDSL/commands/test_count.rb +1 -1
  118. data/test/BioDSL/commands/test_count_values.rb +1 -1
  119. data/test/BioDSL/commands/test_degap_seq.rb +1 -1
  120. data/test/BioDSL/commands/test_dereplicate_seq.rb +1 -1
  121. data/test/BioDSL/commands/test_dump.rb +1 -1
  122. data/test/BioDSL/commands/test_filter_rrna.rb +1 -1
  123. data/test/BioDSL/commands/test_genecall.rb +1 -1
  124. data/test/BioDSL/commands/test_grab.rb +1 -1
  125. data/test/BioDSL/commands/test_index_taxonomy.rb +1 -1
  126. data/test/BioDSL/commands/test_mask_seq.rb +1 -1
  127. data/test/BioDSL/commands/test_mean_scores.rb +1 -1
  128. data/test/BioDSL/commands/test_merge_pair_seq.rb +1 -1
  129. data/test/BioDSL/commands/test_merge_table.rb +1 -1
  130. data/test/BioDSL/commands/test_merge_values.rb +1 -1
  131. data/test/BioDSL/commands/test_plot_heatmap.rb +1 -1
  132. data/test/BioDSL/commands/test_plot_histogram.rb +1 -1
  133. data/test/BioDSL/commands/test_plot_matches.rb +1 -1
  134. data/test/BioDSL/commands/test_plot_residue_distribution.rb +1 -1
  135. data/test/BioDSL/commands/test_plot_scores.rb +1 -1
  136. data/test/BioDSL/commands/test_random.rb +1 -1
  137. data/test/BioDSL/commands/test_read_fasta.rb +1 -1
  138. data/test/BioDSL/commands/test_read_fastq.rb +1 -1
  139. data/test/BioDSL/commands/test_read_table.rb +1 -1
  140. data/test/BioDSL/commands/test_reverse_seq.rb +1 -1
  141. data/test/BioDSL/commands/test_slice_align.rb +1 -1
  142. data/test/BioDSL/commands/test_slice_seq.rb +1 -1
  143. data/test/BioDSL/commands/test_sort.rb +1 -1
  144. data/test/BioDSL/commands/test_split_pair_seq.rb +1 -1
  145. data/test/BioDSL/commands/test_split_values.rb +1 -1
  146. data/test/BioDSL/commands/test_trim_primer.rb +1 -1
  147. data/test/BioDSL/commands/test_trim_seq.rb +1 -1
  148. data/test/BioDSL/commands/test_uchime_ref.rb +1 -1
  149. data/test/BioDSL/commands/test_uclust.rb +1 -1
  150. data/test/BioDSL/commands/test_unique_values.rb +1 -1
  151. data/test/BioDSL/commands/test_usearch_global.rb +1 -1
  152. data/test/BioDSL/commands/test_usearch_local.rb +1 -1
  153. data/test/BioDSL/commands/test_write_fasta.rb +1 -1
  154. data/test/BioDSL/commands/test_write_fastq.rb +1 -1
  155. data/test/BioDSL/commands/test_write_table.rb +1 -1
  156. data/test/BioDSL/commands/test_write_tree.rb +1 -1
  157. data/test/BioDSL/helpers/test_options_helper.rb +3 -3
  158. data/test/BioDSL/seq/test_assemble.rb +58 -56
  159. data/test/BioDSL/seq/test_backtrack.rb +83 -81
  160. data/test/BioDSL/seq/test_digest.rb +47 -45
  161. data/test/BioDSL/seq/test_dynamic.rb +66 -64
  162. data/test/BioDSL/seq/test_homopolymer.rb +35 -33
  163. data/test/BioDSL/seq/test_kmer.rb +29 -28
  164. data/test/BioDSL/seq/test_translate.rb +44 -42
  165. data/test/BioDSL/seq/test_trim.rb +59 -57
  166. data/test/BioDSL/test_cary.rb +1 -1
  167. data/test/BioDSL/test_command.rb +2 -2
  168. data/test/BioDSL/test_csv.rb +34 -31
  169. data/test/BioDSL/test_debug.rb +31 -31
  170. data/test/BioDSL/test_fasta.rb +30 -29
  171. data/test/BioDSL/test_fastq.rb +27 -26
  172. data/test/BioDSL/test_filesys.rb +28 -27
  173. data/test/BioDSL/test_fork.rb +29 -28
  174. data/test/BioDSL/test_math.rb +31 -30
  175. data/test/BioDSL/test_mummer.rb +1 -1
  176. data/test/BioDSL/test_pipeline.rb +1 -1
  177. data/test/BioDSL/test_seq.rb +42 -41
  178. data/test/BioDSL/test_serializer.rb +35 -33
  179. data/test/BioDSL/test_stream.rb +28 -27
  180. data/test/BioDSL/test_taxonomy.rb +38 -37
  181. data/test/BioDSL/test_test.rb +32 -31
  182. data/test/BioDSL/test_tmp_dir.rb +1 -1
  183. data/test/BioDSL/test_usearch.rb +28 -27
  184. data/test/BioDSL/test_verbose.rb +32 -31
  185. data/test/helper.rb +34 -31
  186. metadata +3 -2
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of BioDSL (www.github.com/maasha/BioDSL). #
24
+ # This software is part of BioDSL (http://maasha.github.io/BioDSL). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -143,7 +143,7 @@ module BioDSL
143
143
  case
144
144
  when @options[:first] && @pair then read_first_pair(output)
145
145
  when @options[:first] then read_first_single(output)
146
- when @options[:last] && @pair then read_last_pair(output)
146
+ when @options[:last] && @pair then read_last_pair(output)
147
147
  when @options[:last] then read_last_single(output)
148
148
  when @pair then read_all_pair(output)
149
149
  else
@@ -176,12 +176,12 @@ module BioDSL
176
176
  return unless input
177
177
 
178
178
  input.each do |record|
179
- @status[:records_in] += 1
179
+ @status[:records_in] += 1
180
180
  @status[:records_out] += 1
181
181
 
182
182
  if (seq = record[:SEQ])
183
183
  @status[:sequences_in] += 1
184
- @status[:residues_in] += seq.length
184
+ @status[:residues_in] += seq.length
185
185
  end
186
186
 
187
187
  output << record
@@ -197,10 +197,10 @@ module BioDSL
197
197
  ios.each do |entry|
198
198
  check_entry(entry)
199
199
  output << entry.to_bp
200
- @status[:records_out] += 1
200
+ @status[:records_out] += 1
201
201
  @status[:sequences_out] += 1
202
- @status[:residues_out] += entry.length
203
- return if @status[:sequences_out] >= @options[:first]
202
+ @status[:residues_out] += entry.length
203
+ break if @status[:sequences_out] >= @options[:first]
204
204
  end
205
205
  end
206
206
  end
@@ -220,10 +220,10 @@ module BioDSL
220
220
  reverse_complement(entry2) if @options[:reverse_complement]
221
221
  output << entry1.to_bp
222
222
  output << entry2.to_bp
223
- @status[:records_out] += 2
223
+ @status[:records_out] += 2
224
224
  @status[:sequences_out] += 2
225
- @status[:residues_out] += entry1.length + entry2.length
226
- return if @status[:sequences_out] >= @options[:first]
225
+ @status[:residues_out] += entry1.length + entry2.length
226
+ break if @status[:sequences_out] >= @options[:first]
227
227
  end
228
228
  end
229
229
  end
@@ -279,9 +279,9 @@ module BioDSL
279
279
  ios.each do |entry|
280
280
  check_entry(entry)
281
281
  output << entry.to_bp
282
- @status[:records_out] += 1
282
+ @status[:records_out] += 1
283
283
  @status[:sequences_out] += 1
284
- @status[:residues_out] += entry.length
284
+ @status[:residues_out] += entry.length
285
285
  end
286
286
  end
287
287
  end
@@ -299,9 +299,9 @@ module BioDSL
299
299
  reverse_complement(entry2) if @options[:reverse_complement]
300
300
  output << entry1.to_bp
301
301
  output << entry2.to_bp
302
- @status[:records_out] += 2
302
+ @status[:records_out] += 2
303
303
  @status[:sequences_out] += 2
304
- @status[:residues_out] += entry1.length + entry2.length
304
+ @status[:residues_out] += entry1.length + entry2.length
305
305
  end
306
306
  end
307
307
  end
@@ -405,9 +405,9 @@ module BioDSL
405
405
  @buffer.each do |entry|
406
406
  output << entry.to_bp
407
407
 
408
- @status[:records_out] += 1
408
+ @status[:records_out] += 1
409
409
  @status[:sequences_out] += 1
410
- @status[:residues_out] += entry.length
410
+ @status[:residues_out] += entry.length
411
411
  end
412
412
  end
413
413
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -173,7 +173,6 @@ module BioDSL
173
173
  # {:Organism=>"Mouse"}
174
174
  # {:Organism=>"Cat"}
175
175
  #
176
- # rubocop: disable ClassLength
177
176
  class ReadTable
178
177
  STATS = %i(records_in records_out)
179
178
 
@@ -321,7 +320,7 @@ module BioDSL
321
320
  return unless output
322
321
  input.each do |record|
323
322
  output << record
324
- @status[:records_in] += 1
323
+ @status[:records_in] += 1
325
324
  @status[:records_out] += 1
326
325
  end
327
326
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -102,10 +102,10 @@ module BioDSL
102
102
  entry = BioDSL::Seq.new_bp(record)
103
103
  entry.reverse!
104
104
 
105
- @status[:sequences_in] += 1
105
+ @status[:sequences_in] += 1
106
106
  @status[:sequences_out] += 1
107
- @status[:residues_in] += entry.length
108
- @status[:residues_out] += entry.length
107
+ @status[:residues_in] += entry.length
108
+ @status[:residues_out] += entry.length
109
109
 
110
110
  record.merge! entry.to_bp
111
111
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -231,7 +231,7 @@ module BioDSL
231
231
  def defaults
232
232
  @max_mis = @options[:max_mismatches] || 2
233
233
  @max_ins = @options[:max_insertions] || 1
234
- @max_del = @options[:max_deletions] || 1
234
+ @max_del = @options[:max_deletions] || 1
235
235
  end
236
236
 
237
237
  # Parse FASTA file with one gapped template sequence if specified.
@@ -312,7 +312,7 @@ module BioDSL
312
312
  entry = BioDSL::Seq.new_bp(record)
313
313
 
314
314
  @status[:sequences_in] += 1
315
- @status[:residues_in] += entry.length
315
+ @status[:residues_in] += entry.length
316
316
 
317
317
  setup_slice(entry) unless @slice
318
318
 
@@ -321,7 +321,7 @@ module BioDSL
321
321
  record.merge! entry.to_bp
322
322
 
323
323
  @status[:sequences_out] += 1
324
- @status[:residues_out] += entry.length
324
+ @status[:residues_out] += entry.length
325
325
  end
326
326
 
327
327
  # Usings primers to locate slice positions in entry.
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -138,12 +138,12 @@ module BioDSL
138
138
  entry = BioDSL::Seq.new_bp(record)
139
139
 
140
140
  @status[:sequences_in] += 1
141
- @status[:residues_in] += entry.length
141
+ @status[:residues_in] += entry.length
142
142
 
143
143
  entry = entry[@options[:slice]]
144
144
 
145
145
  @status[:sequences_out] += 1
146
- @status[:residues_out] += entry.length
146
+ @status[:residues_out] += entry.length
147
147
 
148
148
  record.merge! entry.to_bp
149
149
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -146,12 +146,11 @@ module BioDSL
146
146
  # @param output [Enumerator::Yielder] Output stream.
147
147
  # @param record [Hash] BioDSL record.
148
148
  #
149
- # rubocop: disable Metrics/AbcSize
150
149
  def split_pair_seq(output, record)
151
150
  entry = BioDSL::Seq.new_bp(record)
152
151
 
153
152
  @status[:sequences_in] += 1
154
- @status[:residues_in] += entry.length
153
+ @status[:residues_in] += entry.length
155
154
 
156
155
  pos = get_split_pos(record, entry)
157
156
 
@@ -161,13 +160,13 @@ module BioDSL
161
160
  output << entry2.to_bp
162
161
 
163
162
  @status[:sequences_out] += 2
164
- @status[:residues_out] += entry1.length + entry2.length
165
- @status[:records_out] += 2
163
+ @status[:residues_out] += entry1.length + entry2.length
164
+ @status[:records_out] += 2
166
165
  end
167
166
 
168
167
  # Given a record locate the sequence split position.
169
168
  #
170
- # @param record [Hash] BioDSL record.
169
+ # @param record [Hash] BioDSL record.
171
170
  # @param entry [BioDSL::Seq] Sequence entry.
172
171
  #
173
172
  # @return [Integer] Sequence split position.
@@ -210,7 +209,7 @@ module BioDSL
210
209
  def fix_seq_names(entry1, entry2)
211
210
  if entry1.seq_name =~ /^[^ ]+ \d:/
212
211
  entry2.seq_name.sub!(/ \d:/, ' 2:')
213
- elsif entry1.seq_name =~ /^.+\/\d$/
212
+ elsif entry1.seq_name =~ %r{^.+\/\d$}
214
213
  entry2.seq_name[-1] = '2'
215
214
  else
216
215
  fail "Could not match sequence name: #{entry1.seq_name}"
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -82,7 +82,7 @@ module BioDSL
82
82
  #
83
83
  # @return [SplitValues] Class instance.
84
84
  def initialize(options)
85
- @options = options
85
+ @options = options
86
86
 
87
87
  check_options
88
88
 
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -131,10 +131,10 @@ module BioDSL
131
131
  # @return [TrimPrimer] Class instance.
132
132
  def initialize(options)
133
133
  @options = options
134
- @options[:overlap_min] ||= 1
135
- @options[:mismatch_percent] ||= 0
134
+ @options[:overlap_min] ||= 1
135
+ @options[:mismatch_percent] ||= 0
136
136
  @options[:insertion_percent] ||= 0
137
- @options[:deletion_percent] ||= 0
137
+ @options[:deletion_percent] ||= 0
138
138
  @pattern = pattern
139
139
  @hit = false
140
140
 
@@ -153,6 +153,7 @@ module BioDSL
153
153
 
154
154
  if record[:SEQ] && record[:SEQ].length > 0
155
155
  @status[:sequences_in] += 1
156
+ @status[:sequences_out] += 1
156
157
 
157
158
  case @options[:direction]
158
159
  when :forward then trim_forward(record)
@@ -198,7 +199,7 @@ module BioDSL
198
199
  def trim_forward(record)
199
200
  entry = BioDSL::Seq.new_bp(record)
200
201
 
201
- @status[:residues_in] += entry.length
202
+ @status[:residues_in] += entry.length
202
203
 
203
204
  while @pattern.length >= @options[:overlap_min]
204
205
  if (match = match_forward(entry))
@@ -235,6 +236,8 @@ module BioDSL
235
236
  def merge_forward(record, entry, match)
236
237
  entry = entry[match.pos + match.length..-1]
237
238
 
239
+ @status[:residues_out] += entry.length
240
+
238
241
  record.merge!(entry.to_bp)
239
242
  record[:TRIM_PRIMER_DIR] = 'FORWARD'
240
243
  record[:TRIM_PRIMER_POS] = match.pos
@@ -248,7 +251,7 @@ module BioDSL
248
251
  def trim_reverse(record)
249
252
  entry = BioDSL::Seq.new_bp(record)
250
253
 
251
- @status[:residues_in] += entry.length
254
+ @status[:residues_in] += entry.length
252
255
 
253
256
  while @pattern.length >= @options[:overlap_min]
254
257
  if (match = match_reverse(entry))
@@ -288,6 +291,8 @@ module BioDSL
288
291
  def merge_reverse(record, entry, match)
289
292
  entry = entry[0...match.pos]
290
293
 
294
+ @status[:residues_out] += entry.length
295
+
291
296
  record.merge!(entry.to_bp)
292
297
  record[:TRIM_PRIMER_DIR] = 'REVERSE'
293
298
  record[:TRIM_PRIMER_POS] = match.pos
@@ -302,9 +307,9 @@ module BioDSL
302
307
  #
303
308
  # @return [Hash] Match options hash.
304
309
  def match_options(length)
305
- mis = (length * @options[:mismatch_percent] * 0.01).round
310
+ mis = (length * @options[:mismatch_percent] * 0.01).round
306
311
  ins = (length * @options[:insertion_percent] * 0.01).round
307
- del = (length * @options[:deletion_percent] * 0.01).round
312
+ del = (length * @options[:deletion_percent] * 0.01).round
308
313
 
309
314
  {max_mismatches: mis,
310
315
  max_insertions: ins,
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -164,8 +164,8 @@ module BioDSL
164
164
  # Set defaul options.
165
165
  def defaults
166
166
  @options[:quality_min] ||= 20
167
- @options[:mode] ||= :both
168
- @options[:length_min] ||= 3
167
+ @options[:mode] ||= :both
168
+ @options[:length_min] ||= 3
169
169
  end
170
170
 
171
171
  # Trim sequence in a given record with sequence info.
@@ -175,7 +175,7 @@ module BioDSL
175
175
  entry = BioDSL::Seq.new_bp(record)
176
176
 
177
177
  @status[:sequences_in] += 1
178
- @status[:residues_in] += entry.length
178
+ @status[:residues_in] += entry.length
179
179
 
180
180
  case @mode
181
181
  when :both then entry.quality_trim!(@min, @len)
@@ -184,7 +184,7 @@ module BioDSL
184
184
  end
185
185
 
186
186
  @status[:sequences_out] += 1
187
- @status[:residues_out] += entry.length
187
+ @status[:residues_out] += entry.length
188
188
 
189
189
  record.merge! entry.to_bp
190
190
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -71,8 +71,8 @@ module BioDSL
71
71
  @options = options
72
72
  aux_exist('usearch')
73
73
  check_options
74
- @options[:cpus] ||= 1
75
- @options[:strand] ||= 'plus' # This option cant be changed in usearch7.0
74
+ @options[:cpus] ||= 1
75
+ @options[:strand] ||= 'plus' # This option cant be changed in usearch7.0
76
76
  end
77
77
 
78
78
  # Return command lambda for uchime_ref.
@@ -115,7 +115,7 @@ module BioDSL
115
115
 
116
116
  if record[:SEQ]
117
117
  @status[:sequences_in] += 1
118
- @status[:residues_in] += record[:SEQ].length
118
+ @status[:residues_in] += record[:SEQ].length
119
119
  seq_name = record[:SEQ_NAME] || i.to_s
120
120
 
121
121
  entry = BioDSL::Seq.new(seq_name: seq_name, seq: record[:SEQ])
@@ -161,8 +161,8 @@ module BioDSL
161
161
 
162
162
  output << record
163
163
  @status[:sequences_out] += 1
164
- @status[:residues_out] += entry.length
165
- @status[:records_out] += 1
164
+ @status[:residues_out] += entry.length
165
+ @status[:records_out] += 1
166
166
  end
167
167
  end
168
168
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -232,9 +232,9 @@ module BioDSL
232
232
  record.merge!(entry.to_bp)
233
233
 
234
234
  output << record
235
- @status[:records_out] += 1
235
+ @status[:records_out] += 1
236
236
  @status[:sequences_out] += 1
237
- @status[:residues_out] += entry.length
237
+ @status[:residues_out] += entry.length
238
238
  end
239
239
  end
240
240
  end
@@ -273,9 +273,9 @@ module BioDSL
273
273
 
274
274
  if (r = results[record[:SEQ_NAME]])
275
275
  output << record.merge(r)
276
- @status[:records_out] += 1
276
+ @status[:records_out] += 1
277
277
  @status[:sequences_out] += 1
278
- @status[:residues_out] += record[:SEQ].length
278
+ @status[:residues_out] += record[:SEQ].length
279
279
  else
280
280
  fail BioDSL::UsearchError, 'Sequence name: ' \
281
281
  "#{record[:SEQ_NAME]} not found in uclust results"