BioDSL 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/BioDSL.gemspec +1 -1
  4. data/Gemfile +6 -0
  5. data/README.md +289 -155
  6. data/Rakefile +18 -16
  7. data/lib/BioDSL.rb +1 -1
  8. data/lib/BioDSL/cary.rb +78 -53
  9. data/lib/BioDSL/command.rb +2 -2
  10. data/lib/BioDSL/commands.rb +1 -1
  11. data/lib/BioDSL/commands/add_key.rb +1 -1
  12. data/lib/BioDSL/commands/align_seq_mothur.rb +4 -4
  13. data/lib/BioDSL/commands/analyze_residue_distribution.rb +5 -5
  14. data/lib/BioDSL/commands/assemble_pairs.rb +13 -13
  15. data/lib/BioDSL/commands/assemble_seq_idba.rb +7 -9
  16. data/lib/BioDSL/commands/assemble_seq_ray.rb +13 -13
  17. data/lib/BioDSL/commands/assemble_seq_spades.rb +4 -4
  18. data/lib/BioDSL/commands/classify_seq.rb +8 -8
  19. data/lib/BioDSL/commands/classify_seq_mothur.rb +5 -5
  20. data/lib/BioDSL/commands/clip_primer.rb +7 -7
  21. data/lib/BioDSL/commands/cluster_otus.rb +5 -5
  22. data/lib/BioDSL/commands/collapse_otus.rb +2 -2
  23. data/lib/BioDSL/commands/collect_otus.rb +2 -2
  24. data/lib/BioDSL/commands/complement_seq.rb +4 -4
  25. data/lib/BioDSL/commands/count.rb +1 -1
  26. data/lib/BioDSL/commands/count_values.rb +2 -2
  27. data/lib/BioDSL/commands/degap_seq.rb +6 -7
  28. data/lib/BioDSL/commands/dereplicate_seq.rb +1 -1
  29. data/lib/BioDSL/commands/dump.rb +2 -2
  30. data/lib/BioDSL/commands/filter_rrna.rb +4 -4
  31. data/lib/BioDSL/commands/genecall.rb +7 -7
  32. data/lib/BioDSL/commands/grab.rb +1 -1
  33. data/lib/BioDSL/commands/index_taxonomy.rb +3 -3
  34. data/lib/BioDSL/commands/mask_seq.rb +4 -4
  35. data/lib/BioDSL/commands/mean_scores.rb +2 -2
  36. data/lib/BioDSL/commands/merge_pair_seq.rb +3 -3
  37. data/lib/BioDSL/commands/merge_table.rb +1 -1
  38. data/lib/BioDSL/commands/merge_values.rb +1 -1
  39. data/lib/BioDSL/commands/plot_heatmap.rb +4 -5
  40. data/lib/BioDSL/commands/plot_histogram.rb +4 -4
  41. data/lib/BioDSL/commands/plot_matches.rb +5 -5
  42. data/lib/BioDSL/commands/plot_residue_distribution.rb +6 -6
  43. data/lib/BioDSL/commands/plot_scores.rb +7 -7
  44. data/lib/BioDSL/commands/random.rb +1 -1
  45. data/lib/BioDSL/commands/read_fasta.rb +9 -9
  46. data/lib/BioDSL/commands/read_fastq.rb +16 -16
  47. data/lib/BioDSL/commands/read_table.rb +2 -3
  48. data/lib/BioDSL/commands/reverse_seq.rb +4 -4
  49. data/lib/BioDSL/commands/slice_align.rb +4 -4
  50. data/lib/BioDSL/commands/slice_seq.rb +3 -3
  51. data/lib/BioDSL/commands/sort.rb +1 -1
  52. data/lib/BioDSL/commands/split_pair_seq.rb +6 -7
  53. data/lib/BioDSL/commands/split_values.rb +2 -2
  54. data/lib/BioDSL/commands/trim_primer.rb +13 -8
  55. data/lib/BioDSL/commands/trim_seq.rb +5 -5
  56. data/lib/BioDSL/commands/uchime_ref.rb +6 -6
  57. data/lib/BioDSL/commands/uclust.rb +5 -5
  58. data/lib/BioDSL/commands/unique_values.rb +1 -1
  59. data/lib/BioDSL/commands/usearch_global.rb +2 -2
  60. data/lib/BioDSL/commands/usearch_local.rb +2 -2
  61. data/lib/BioDSL/commands/write_fasta.rb +7 -9
  62. data/lib/BioDSL/commands/write_fastq.rb +4 -4
  63. data/lib/BioDSL/commands/write_table.rb +3 -3
  64. data/lib/BioDSL/commands/write_tree.rb +2 -3
  65. data/lib/BioDSL/config.rb +2 -2
  66. data/lib/BioDSL/csv.rb +8 -10
  67. data/lib/BioDSL/debug.rb +1 -1
  68. data/lib/BioDSL/fasta.rb +54 -40
  69. data/lib/BioDSL/fastq.rb +35 -32
  70. data/lib/BioDSL/filesys.rb +56 -47
  71. data/lib/BioDSL/fork.rb +1 -1
  72. data/lib/BioDSL/hamming.rb +1 -1
  73. data/lib/BioDSL/helpers.rb +1 -1
  74. data/lib/BioDSL/helpers/aux_helper.rb +1 -1
  75. data/lib/BioDSL/helpers/email_helper.rb +1 -1
  76. data/lib/BioDSL/helpers/history_helper.rb +1 -1
  77. data/lib/BioDSL/helpers/log_helper.rb +1 -1
  78. data/lib/BioDSL/helpers/options_helper.rb +1 -1
  79. data/lib/BioDSL/helpers/status_helper.rb +1 -1
  80. data/lib/BioDSL/html_report.rb +1 -1
  81. data/lib/BioDSL/math.rb +1 -1
  82. data/lib/BioDSL/mummer.rb +1 -1
  83. data/lib/BioDSL/pipeline.rb +1 -1
  84. data/lib/BioDSL/seq.rb +240 -231
  85. data/lib/BioDSL/seq/ambiguity.rb +1 -1
  86. data/lib/BioDSL/seq/assemble.rb +1 -1
  87. data/lib/BioDSL/seq/backtrack.rb +93 -76
  88. data/lib/BioDSL/seq/digest.rb +1 -1
  89. data/lib/BioDSL/seq/dynamic.rb +43 -55
  90. data/lib/BioDSL/seq/homopolymer.rb +34 -36
  91. data/lib/BioDSL/seq/kmer.rb +67 -50
  92. data/lib/BioDSL/seq/levenshtein.rb +35 -40
  93. data/lib/BioDSL/seq/translate.rb +64 -55
  94. data/lib/BioDSL/seq/trim.rb +60 -50
  95. data/lib/BioDSL/serializer.rb +1 -1
  96. data/lib/BioDSL/stream.rb +1 -1
  97. data/lib/BioDSL/taxonomy.rb +1 -1
  98. data/lib/BioDSL/test.rb +1 -1
  99. data/lib/BioDSL/tmp_dir.rb +1 -1
  100. data/lib/BioDSL/usearch.rb +1 -1
  101. data/lib/BioDSL/verbose.rb +1 -1
  102. data/lib/BioDSL/version.rb +2 -2
  103. data/test/BioDSL/commands/test_add_key.rb +1 -1
  104. data/test/BioDSL/commands/test_align_seq_mothur.rb +1 -1
  105. data/test/BioDSL/commands/test_analyze_residue_distribution.rb +1 -1
  106. data/test/BioDSL/commands/test_assemble_pairs.rb +1 -1
  107. data/test/BioDSL/commands/test_assemble_seq_idba.rb +1 -1
  108. data/test/BioDSL/commands/test_assemble_seq_ray.rb +1 -1
  109. data/test/BioDSL/commands/test_assemble_seq_spades.rb +1 -1
  110. data/test/BioDSL/commands/test_classify_seq.rb +1 -1
  111. data/test/BioDSL/commands/test_classify_seq_mothur.rb +1 -1
  112. data/test/BioDSL/commands/test_clip_primer.rb +1 -1
  113. data/test/BioDSL/commands/test_cluster_otus.rb +1 -1
  114. data/test/BioDSL/commands/test_collapse_otus.rb +1 -1
  115. data/test/BioDSL/commands/test_collect_otus.rb +1 -1
  116. data/test/BioDSL/commands/test_complement_seq.rb +1 -1
  117. data/test/BioDSL/commands/test_count.rb +1 -1
  118. data/test/BioDSL/commands/test_count_values.rb +1 -1
  119. data/test/BioDSL/commands/test_degap_seq.rb +1 -1
  120. data/test/BioDSL/commands/test_dereplicate_seq.rb +1 -1
  121. data/test/BioDSL/commands/test_dump.rb +1 -1
  122. data/test/BioDSL/commands/test_filter_rrna.rb +1 -1
  123. data/test/BioDSL/commands/test_genecall.rb +1 -1
  124. data/test/BioDSL/commands/test_grab.rb +1 -1
  125. data/test/BioDSL/commands/test_index_taxonomy.rb +1 -1
  126. data/test/BioDSL/commands/test_mask_seq.rb +1 -1
  127. data/test/BioDSL/commands/test_mean_scores.rb +1 -1
  128. data/test/BioDSL/commands/test_merge_pair_seq.rb +1 -1
  129. data/test/BioDSL/commands/test_merge_table.rb +1 -1
  130. data/test/BioDSL/commands/test_merge_values.rb +1 -1
  131. data/test/BioDSL/commands/test_plot_heatmap.rb +1 -1
  132. data/test/BioDSL/commands/test_plot_histogram.rb +1 -1
  133. data/test/BioDSL/commands/test_plot_matches.rb +1 -1
  134. data/test/BioDSL/commands/test_plot_residue_distribution.rb +1 -1
  135. data/test/BioDSL/commands/test_plot_scores.rb +1 -1
  136. data/test/BioDSL/commands/test_random.rb +1 -1
  137. data/test/BioDSL/commands/test_read_fasta.rb +1 -1
  138. data/test/BioDSL/commands/test_read_fastq.rb +1 -1
  139. data/test/BioDSL/commands/test_read_table.rb +1 -1
  140. data/test/BioDSL/commands/test_reverse_seq.rb +1 -1
  141. data/test/BioDSL/commands/test_slice_align.rb +1 -1
  142. data/test/BioDSL/commands/test_slice_seq.rb +1 -1
  143. data/test/BioDSL/commands/test_sort.rb +1 -1
  144. data/test/BioDSL/commands/test_split_pair_seq.rb +1 -1
  145. data/test/BioDSL/commands/test_split_values.rb +1 -1
  146. data/test/BioDSL/commands/test_trim_primer.rb +1 -1
  147. data/test/BioDSL/commands/test_trim_seq.rb +1 -1
  148. data/test/BioDSL/commands/test_uchime_ref.rb +1 -1
  149. data/test/BioDSL/commands/test_uclust.rb +1 -1
  150. data/test/BioDSL/commands/test_unique_values.rb +1 -1
  151. data/test/BioDSL/commands/test_usearch_global.rb +1 -1
  152. data/test/BioDSL/commands/test_usearch_local.rb +1 -1
  153. data/test/BioDSL/commands/test_write_fasta.rb +1 -1
  154. data/test/BioDSL/commands/test_write_fastq.rb +1 -1
  155. data/test/BioDSL/commands/test_write_table.rb +1 -1
  156. data/test/BioDSL/commands/test_write_tree.rb +1 -1
  157. data/test/BioDSL/helpers/test_options_helper.rb +3 -3
  158. data/test/BioDSL/seq/test_assemble.rb +58 -56
  159. data/test/BioDSL/seq/test_backtrack.rb +83 -81
  160. data/test/BioDSL/seq/test_digest.rb +47 -45
  161. data/test/BioDSL/seq/test_dynamic.rb +66 -64
  162. data/test/BioDSL/seq/test_homopolymer.rb +35 -33
  163. data/test/BioDSL/seq/test_kmer.rb +29 -28
  164. data/test/BioDSL/seq/test_translate.rb +44 -42
  165. data/test/BioDSL/seq/test_trim.rb +59 -57
  166. data/test/BioDSL/test_cary.rb +1 -1
  167. data/test/BioDSL/test_command.rb +2 -2
  168. data/test/BioDSL/test_csv.rb +34 -31
  169. data/test/BioDSL/test_debug.rb +31 -31
  170. data/test/BioDSL/test_fasta.rb +30 -29
  171. data/test/BioDSL/test_fastq.rb +27 -26
  172. data/test/BioDSL/test_filesys.rb +28 -27
  173. data/test/BioDSL/test_fork.rb +29 -28
  174. data/test/BioDSL/test_math.rb +31 -30
  175. data/test/BioDSL/test_mummer.rb +1 -1
  176. data/test/BioDSL/test_pipeline.rb +1 -1
  177. data/test/BioDSL/test_seq.rb +42 -41
  178. data/test/BioDSL/test_serializer.rb +35 -33
  179. data/test/BioDSL/test_stream.rb +28 -27
  180. data/test/BioDSL/test_taxonomy.rb +38 -37
  181. data/test/BioDSL/test_test.rb +32 -31
  182. data/test/BioDSL/test_tmp_dir.rb +1 -1
  183. data/test/BioDSL/test_usearch.rb +28 -27
  184. data/test/BioDSL/test_verbose.rb +32 -31
  185. data/test/helper.rb +34 -31
  186. metadata +3 -2
@@ -21,13 +21,11 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
28
28
  module BioDSL
29
- # rubocop:disable ClassLength
30
-
31
29
  # == Assemble sequences the stream using IDBA_UD.
32
30
  #
33
31
  # +assemble_seq_idba+ is a wrapper around the prokaryotic metagenome
@@ -99,7 +97,7 @@ module BioDSL
99
97
  TmpDir.create('reads.fna', 'contig.fa') do |fa_in, fa_out, tmp_dir|
100
98
  process_input(input, output, fa_in)
101
99
  execute_idba(fa_in, tmp_dir)
102
- lengths = process_output(output, fa_out)
100
+ process_output(output, fa_out)
103
101
  end
104
102
 
105
103
  calc_n50(status)
@@ -123,7 +121,7 @@ module BioDSL
123
121
  def defaults
124
122
  @options[:kmer_min] ||= 24
125
123
  @options[:kmer_max] ||= 48
126
- @options[:cpus] ||= 1
124
+ @options[:cpus] ||= 1
127
125
  end
128
126
 
129
127
  # Read all records from input and emit non-sequence records to the output
@@ -141,7 +139,7 @@ module BioDSL
141
139
  entry = BioDSL::Seq.new_bp(record)
142
140
 
143
141
  @status[:sequences_in] += 1
144
- @status[:residues_in] += entry.length
142
+ @status[:residues_in] += entry.length
145
143
 
146
144
  fasta_io.puts entry.to_fasta
147
145
  else
@@ -193,9 +191,9 @@ module BioDSL
193
191
  BioDSL::Fasta.open(fa_out, 'r') do |ios|
194
192
  ios.each do |entry|
195
193
  output << entry.to_bp
196
- @status[:records_out] += 1
194
+ @status[:records_out] += 1
197
195
  @status[:sequences_out] += 1
198
- @status[:residues_out] += entry.length
196
+ @status[:residues_out] += entry.length
199
197
 
200
198
  @lengths << entry.length
201
199
  end
@@ -212,7 +210,7 @@ module BioDSL
212
210
  @lengths.reverse!
213
211
 
214
212
  status[:contig_max] = @lengths.first || 0
215
- status[:contig_min] = @lengths.last || 0
213
+ status[:contig_min] = @lengths.last || 0
216
214
  status[:contig_n50] = 0
217
215
 
218
216
  count = 0
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -164,10 +164,10 @@ module BioDSL
164
164
 
165
165
  # Set the default option values.
166
166
  def defaults
167
- @options[:kmer_min] ||= 21
168
- @options[:kmer_max] ||= 49
167
+ @options[:kmer_min] ||= 21
168
+ @options[:kmer_max] ||= 49
169
169
  @options[:contig_min] ||= 500
170
- @options[:cpus] ||= 1
170
+ @options[:cpus] ||= 1
171
171
  end
172
172
 
173
173
  # Read all records from input and emit non-sequence records to the output
@@ -185,7 +185,7 @@ module BioDSL
185
185
  entry = BioDSL::Seq.new_bp(record)
186
186
 
187
187
  @status[:sequences_in] += 1
188
- @status[:residues_in] += entry.length
188
+ @status[:residues_in] += entry.length
189
189
 
190
190
  fasta_io.puts entry.to_fasta
191
191
  else
@@ -314,11 +314,11 @@ module BioDSL
314
314
  next if entry.length < @options[:contig_min]
315
315
 
316
316
  lengths << entry.length
317
- output << entry.to_bp
317
+ output << entry.to_bp
318
318
 
319
- @status[:records_out] += 1
319
+ @status[:records_out] += 1
320
320
  @status[:sequences_out] += 1
321
- @status[:residues_out] += entry.length
321
+ @status[:residues_out] += entry.length
322
322
  end
323
323
  end
324
324
 
@@ -333,11 +333,11 @@ module BioDSL
333
333
  @status[:kmer] = kmer
334
334
  @status[:paired] = @paired
335
335
 
336
- unless lengths.empty?
337
- @status[:contig_min] = lengths.min
338
- @status[:contig_max] = lengths.max
339
- @status[:n50] = calc_n50(lengths)
340
- end
336
+ return if lengths.empty?
337
+
338
+ @status[:contig_min] = lengths.min
339
+ @status[:contig_max] = lengths.max
340
+ @status[:n50] = calc_n50(lengths)
341
341
  end
342
342
 
343
343
  N50 = Struct.new(:kmer, :n50)
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -160,7 +160,7 @@ module BioDSL
160
160
  entry = BioDSL::Seq.new_bp(record)
161
161
 
162
162
  @status[:sequences_in] += 1
163
- @status[:residues_in] += entry.length
163
+ @status[:residues_in] += entry.length
164
164
 
165
165
  if entry.qual
166
166
  @type = :fastq
@@ -216,9 +216,9 @@ module BioDSL
216
216
  BioDSL::Fasta.open(output_file) do |ios|
217
217
  ios.each do |entry|
218
218
  output << entry.to_bp
219
- @status[:records_out] += 1
219
+ @status[:records_out] += 1
220
220
  @status[:sequences_out] += 1
221
- @status[:residues_out] += entry.length
221
+ @status[:residues_out] += entry.length
222
222
 
223
223
  @lengths << entry.length
224
224
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -185,12 +185,12 @@ module BioDSL
185
185
 
186
186
  # Set default options.
187
187
  def defaults
188
- @options[:prefix] ||= 'taxonomy'
188
+ @options[:prefix] ||= 'taxonomy'
189
189
  @options[:kmer_size] ||= 8
190
190
  @options[:step_size] ||= 1
191
- @options[:hits_max] ||= 50
191
+ @options[:hits_max] ||= 50
192
192
  @options[:consensus] ||= 0.51
193
- @options[:coverage] ||= 0.9
193
+ @options[:coverage] ||= 0.9
194
194
  @options[:best_only] = true if @options[:best_only].nil?
195
195
  end
196
196
 
@@ -200,14 +200,14 @@ module BioDSL
200
200
  # @param i [Fixnum] Record number,
201
201
  # @param search [BioDSL::Taxonomy::Search] Search object.
202
202
  def classify_seq(record, i, search)
203
- @status[:sequences_in] += 1
203
+ @status[:sequences_in] += 1
204
204
  @status[:sequences_out] += 1
205
- @status[:residues_in] += record[:SEQ].length
206
- @status[:residues_out] += record[:SEQ].length
205
+ @status[:residues_in] += record[:SEQ].length
206
+ @status[:residues_out] += record[:SEQ].length
207
207
  seq_name = record[:SEQ_NAME] || i.to_s
208
208
 
209
209
  result = search.execute(BioDSL::Seq.new(seq_name: seq_name,
210
- seq: record[:SEQ]))
210
+ seq: record[:SEQ]))
211
211
 
212
212
  record[:TAXONOMY] = result.taxonomy
213
213
  record[:TAXONOMY_HITS] = result.hits
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -128,7 +128,7 @@ module BioDSL
128
128
  # Set default options.
129
129
  def defaults
130
130
  @options[:confidence] ||= 80
131
- @options[:cpus] ||= 1
131
+ @options[:cpus] ||= 1
132
132
  end
133
133
 
134
134
  # Process input data and save sequences to a temporary file for
@@ -143,10 +143,10 @@ module BioDSL
143
143
  @status[:records_in] += 1
144
144
 
145
145
  if record[:SEQ]
146
- @status[:sequences_in] += 1
146
+ @status[:sequences_in] += 1
147
147
  @status[:sequences_out] += 1
148
- @status[:residues_in] += record[:SEQ].length
149
- @status[:records_out] += record[:SEQ].length
148
+ @status[:residues_in] += record[:SEQ].length
149
+ @status[:records_out] += record[:SEQ].length
150
150
  seq_name = record[:SEQ_NAME] || i.to_s
151
151
 
152
152
  entry = BioDSL::Seq.new(seq_name: seq_name, seq: record[:SEQ])
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -166,16 +166,16 @@ module BioDSL
166
166
 
167
167
  # Set default option values.
168
168
  def defaults
169
- @options[:mismatch_percent] ||= 0
169
+ @options[:mismatch_percent] ||= 0
170
170
  @options[:insertion_percent] ||= 0
171
- @options[:deletion_percent] ||= 0
171
+ @options[:deletion_percent] ||= 0
172
172
  end
173
173
 
174
174
  # Calculate the mismatch percentage.
175
175
  #
176
176
  # @return [Float] Mismatch percentage.
177
177
  def calc_mis
178
- (@primer.length * @options[:mismatch_percent] * 0.01).round
178
+ (@primer.length * @options[:mismatch_percent] * 0.01).round
179
179
  end
180
180
 
181
181
  # Calculate the insertion percentage.
@@ -189,7 +189,7 @@ module BioDSL
189
189
  #
190
190
  # @return [Float] Deletion percentage.
191
191
  def calc_del
192
- (@primer.length * @options[:deletion_percent] * 0.01).round
192
+ (@primer.length * @options[:deletion_percent] * 0.01).round
193
193
  end
194
194
 
195
195
  # Reset any previous clip_primer results from record.
@@ -207,7 +207,7 @@ module BioDSL
207
207
  entry = BioDSL::Seq.new_bp(record)
208
208
 
209
209
  @status[:sequences_in] += 1
210
- @status[:residues_in] += entry.length
210
+ @status[:residues_in] += entry.length
211
211
 
212
212
  case @options[:direction]
213
213
  when :forward then clip_primer_forward(record, entry)
@@ -217,7 +217,7 @@ module BioDSL
217
217
  end
218
218
 
219
219
  @status[:sequences_out] += 1
220
- @status[:residues_out] += entry.length
220
+ @status[:residues_out] += entry.length
221
221
  end
222
222
 
223
223
  # Clip forward primer from entry and save clip information
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -89,8 +89,8 @@ module BioDSL
89
89
  process_input(input, output, tmp_in)
90
90
 
91
91
  BioDSL::Usearch.cluster_otus(input: tmp_in, output: tmp_out,
92
- identity: @options[:identity],
93
- verbose: @options[:verbose])
92
+ identity: @options[:identity],
93
+ verbose: @options[:verbose])
94
94
 
95
95
  process_output(output, tmp_out)
96
96
  end
@@ -172,8 +172,8 @@ module BioDSL
172
172
 
173
173
  output << record
174
174
  @status[:sequences_out] += 1
175
- @status[:residues_out] += record[:SEQ].length
176
- @status[:records_out] += 1
175
+ @status[:residues_out] += record[:SEQ].length
176
+ @status[:records_out] += 1
177
177
  end
178
178
  end
179
179
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -162,7 +162,7 @@ module BioDSL
162
162
  def write_tax(hash, output)
163
163
  hash.each_value do |record|
164
164
  output << record
165
- @status[:otus_out] += 1
165
+ @status[:otus_out] += 1
166
166
  @status[:records_out] += 1
167
167
  end
168
168
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -142,7 +142,7 @@ module BioDSL
142
142
 
143
143
  output << record
144
144
 
145
- @status[:hits_out] += 1
145
+ @status[:hits_out] += 1
146
146
  @status[:records_out] += 1
147
147
  end
148
148
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -106,10 +106,10 @@ module BioDSL
106
106
  entry.type = @type
107
107
  entry.complement!
108
108
 
109
- @status[:sequences_in] += 1
109
+ @status[:sequences_in] += 1
110
110
  @status[:sequences_out] += 1
111
- @status[:residues_in] += entry.length
112
- @status[:residues_out] += entry.length
111
+ @status[:residues_in] += entry.length
112
+ @status[:residues_out] += entry.length
113
113
 
114
114
  record.merge! entry.to_bp
115
115
  end
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -74,7 +74,7 @@ module BioDSL
74
74
  #
75
75
  # @return [CountValues] Instance of class.
76
76
  def initialize(options)
77
- @options = options
77
+ @options = options
78
78
 
79
79
  check_options
80
80
 
@@ -21,7 +21,7 @@
21
21
  # #
22
22
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
23
23
  # #
24
- # This software is part of the BioDSL framework (www.BioDSL.org). #
24
+ # This software is part of the BioDSL (www.BioDSL.org). #
25
25
  # #
26
26
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
27
27
 
@@ -68,7 +68,6 @@ module BioDSL
68
68
  # {:SEQ_NAME=>"test1", :SEQ=>"A-GTC", :SEQ_LEN=>5}
69
69
  # {:SEQ_NAME=>"test2", :SEQ=>"AGGTC", :SEQ_LEN=>5}
70
70
  #
71
- # rubocop:disable ClassLength
72
71
  class DegapSeq
73
72
  require 'narray'
74
73
 
@@ -157,14 +156,14 @@ module BioDSL
157
156
  # @param seq [String] Sequences.
158
157
  def mask_add(seq)
159
158
  @status[:sequences_in] += 1
160
- @status[:residues_in] += seq.length
159
+ @status[:residues_in] += seq.length
161
160
 
162
161
  @max_len ||= seq.length
163
162
 
164
163
  check_length(seq)
165
164
 
166
165
  @na_mask ||= NArray.int(seq.length)
167
- na_seq = NArray.to_na(seq, 'byte')
166
+ na_seq = NArray.to_na(seq, 'byte')
168
167
  @indels.each_char { |c| @na_mask += na_seq.eq(c.ord) }
169
168
  end
170
169
 
@@ -212,7 +211,7 @@ module BioDSL
212
211
  record[:SEQ_LEN] = record[:SEQ].length
213
212
 
214
213
  @status[:sequences_out] += 1
215
- @status[:residues_out] += record[:SEQ].length
214
+ @status[:residues_out] += record[:SEQ].length
216
215
  end
217
216
 
218
217
  # Remove all gaps from all sequences in input stream and output to output
@@ -240,12 +239,12 @@ module BioDSL
240
239
  entry = BioDSL::Seq.new_bp(record)
241
240
 
242
241
  @status[:sequences_in] += 1
243
- @status[:residues_in] += entry.length
242
+ @status[:residues_in] += entry.length
244
243
 
245
244
  entry.seq.delete!(@indels)
246
245
 
247
246
  @status[:sequences_out] += 1
248
- @status[:residues_out] += entry.length
247
+ @status[:residues_out] += entry.length
249
248
 
250
249
  record.merge! entry.to_bp
251
250
  end