BioDSL 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/BioDSL.gemspec +1 -1
- data/Gemfile +6 -0
- data/README.md +289 -155
- data/Rakefile +18 -16
- data/lib/BioDSL.rb +1 -1
- data/lib/BioDSL/cary.rb +78 -53
- data/lib/BioDSL/command.rb +2 -2
- data/lib/BioDSL/commands.rb +1 -1
- data/lib/BioDSL/commands/add_key.rb +1 -1
- data/lib/BioDSL/commands/align_seq_mothur.rb +4 -4
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +5 -5
- data/lib/BioDSL/commands/assemble_pairs.rb +13 -13
- data/lib/BioDSL/commands/assemble_seq_idba.rb +7 -9
- data/lib/BioDSL/commands/assemble_seq_ray.rb +13 -13
- data/lib/BioDSL/commands/assemble_seq_spades.rb +4 -4
- data/lib/BioDSL/commands/classify_seq.rb +8 -8
- data/lib/BioDSL/commands/classify_seq_mothur.rb +5 -5
- data/lib/BioDSL/commands/clip_primer.rb +7 -7
- data/lib/BioDSL/commands/cluster_otus.rb +5 -5
- data/lib/BioDSL/commands/collapse_otus.rb +2 -2
- data/lib/BioDSL/commands/collect_otus.rb +2 -2
- data/lib/BioDSL/commands/complement_seq.rb +4 -4
- data/lib/BioDSL/commands/count.rb +1 -1
- data/lib/BioDSL/commands/count_values.rb +2 -2
- data/lib/BioDSL/commands/degap_seq.rb +6 -7
- data/lib/BioDSL/commands/dereplicate_seq.rb +1 -1
- data/lib/BioDSL/commands/dump.rb +2 -2
- data/lib/BioDSL/commands/filter_rrna.rb +4 -4
- data/lib/BioDSL/commands/genecall.rb +7 -7
- data/lib/BioDSL/commands/grab.rb +1 -1
- data/lib/BioDSL/commands/index_taxonomy.rb +3 -3
- data/lib/BioDSL/commands/mask_seq.rb +4 -4
- data/lib/BioDSL/commands/mean_scores.rb +2 -2
- data/lib/BioDSL/commands/merge_pair_seq.rb +3 -3
- data/lib/BioDSL/commands/merge_table.rb +1 -1
- data/lib/BioDSL/commands/merge_values.rb +1 -1
- data/lib/BioDSL/commands/plot_heatmap.rb +4 -5
- data/lib/BioDSL/commands/plot_histogram.rb +4 -4
- data/lib/BioDSL/commands/plot_matches.rb +5 -5
- data/lib/BioDSL/commands/plot_residue_distribution.rb +6 -6
- data/lib/BioDSL/commands/plot_scores.rb +7 -7
- data/lib/BioDSL/commands/random.rb +1 -1
- data/lib/BioDSL/commands/read_fasta.rb +9 -9
- data/lib/BioDSL/commands/read_fastq.rb +16 -16
- data/lib/BioDSL/commands/read_table.rb +2 -3
- data/lib/BioDSL/commands/reverse_seq.rb +4 -4
- data/lib/BioDSL/commands/slice_align.rb +4 -4
- data/lib/BioDSL/commands/slice_seq.rb +3 -3
- data/lib/BioDSL/commands/sort.rb +1 -1
- data/lib/BioDSL/commands/split_pair_seq.rb +6 -7
- data/lib/BioDSL/commands/split_values.rb +2 -2
- data/lib/BioDSL/commands/trim_primer.rb +13 -8
- data/lib/BioDSL/commands/trim_seq.rb +5 -5
- data/lib/BioDSL/commands/uchime_ref.rb +6 -6
- data/lib/BioDSL/commands/uclust.rb +5 -5
- data/lib/BioDSL/commands/unique_values.rb +1 -1
- data/lib/BioDSL/commands/usearch_global.rb +2 -2
- data/lib/BioDSL/commands/usearch_local.rb +2 -2
- data/lib/BioDSL/commands/write_fasta.rb +7 -9
- data/lib/BioDSL/commands/write_fastq.rb +4 -4
- data/lib/BioDSL/commands/write_table.rb +3 -3
- data/lib/BioDSL/commands/write_tree.rb +2 -3
- data/lib/BioDSL/config.rb +2 -2
- data/lib/BioDSL/csv.rb +8 -10
- data/lib/BioDSL/debug.rb +1 -1
- data/lib/BioDSL/fasta.rb +54 -40
- data/lib/BioDSL/fastq.rb +35 -32
- data/lib/BioDSL/filesys.rb +56 -47
- data/lib/BioDSL/fork.rb +1 -1
- data/lib/BioDSL/hamming.rb +1 -1
- data/lib/BioDSL/helpers.rb +1 -1
- data/lib/BioDSL/helpers/aux_helper.rb +1 -1
- data/lib/BioDSL/helpers/email_helper.rb +1 -1
- data/lib/BioDSL/helpers/history_helper.rb +1 -1
- data/lib/BioDSL/helpers/log_helper.rb +1 -1
- data/lib/BioDSL/helpers/options_helper.rb +1 -1
- data/lib/BioDSL/helpers/status_helper.rb +1 -1
- data/lib/BioDSL/html_report.rb +1 -1
- data/lib/BioDSL/math.rb +1 -1
- data/lib/BioDSL/mummer.rb +1 -1
- data/lib/BioDSL/pipeline.rb +1 -1
- data/lib/BioDSL/seq.rb +240 -231
- data/lib/BioDSL/seq/ambiguity.rb +1 -1
- data/lib/BioDSL/seq/assemble.rb +1 -1
- data/lib/BioDSL/seq/backtrack.rb +93 -76
- data/lib/BioDSL/seq/digest.rb +1 -1
- data/lib/BioDSL/seq/dynamic.rb +43 -55
- data/lib/BioDSL/seq/homopolymer.rb +34 -36
- data/lib/BioDSL/seq/kmer.rb +67 -50
- data/lib/BioDSL/seq/levenshtein.rb +35 -40
- data/lib/BioDSL/seq/translate.rb +64 -55
- data/lib/BioDSL/seq/trim.rb +60 -50
- data/lib/BioDSL/serializer.rb +1 -1
- data/lib/BioDSL/stream.rb +1 -1
- data/lib/BioDSL/taxonomy.rb +1 -1
- data/lib/BioDSL/test.rb +1 -1
- data/lib/BioDSL/tmp_dir.rb +1 -1
- data/lib/BioDSL/usearch.rb +1 -1
- data/lib/BioDSL/verbose.rb +1 -1
- data/lib/BioDSL/version.rb +2 -2
- data/test/BioDSL/commands/test_add_key.rb +1 -1
- data/test/BioDSL/commands/test_align_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +1 -1
- data/test/BioDSL/commands/test_assemble_pairs.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_idba.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_ray.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_spades.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_clip_primer.rb +1 -1
- data/test/BioDSL/commands/test_cluster_otus.rb +1 -1
- data/test/BioDSL/commands/test_collapse_otus.rb +1 -1
- data/test/BioDSL/commands/test_collect_otus.rb +1 -1
- data/test/BioDSL/commands/test_complement_seq.rb +1 -1
- data/test/BioDSL/commands/test_count.rb +1 -1
- data/test/BioDSL/commands/test_count_values.rb +1 -1
- data/test/BioDSL/commands/test_degap_seq.rb +1 -1
- data/test/BioDSL/commands/test_dereplicate_seq.rb +1 -1
- data/test/BioDSL/commands/test_dump.rb +1 -1
- data/test/BioDSL/commands/test_filter_rrna.rb +1 -1
- data/test/BioDSL/commands/test_genecall.rb +1 -1
- data/test/BioDSL/commands/test_grab.rb +1 -1
- data/test/BioDSL/commands/test_index_taxonomy.rb +1 -1
- data/test/BioDSL/commands/test_mask_seq.rb +1 -1
- data/test/BioDSL/commands/test_mean_scores.rb +1 -1
- data/test/BioDSL/commands/test_merge_pair_seq.rb +1 -1
- data/test/BioDSL/commands/test_merge_table.rb +1 -1
- data/test/BioDSL/commands/test_merge_values.rb +1 -1
- data/test/BioDSL/commands/test_plot_heatmap.rb +1 -1
- data/test/BioDSL/commands/test_plot_histogram.rb +1 -1
- data/test/BioDSL/commands/test_plot_matches.rb +1 -1
- data/test/BioDSL/commands/test_plot_residue_distribution.rb +1 -1
- data/test/BioDSL/commands/test_plot_scores.rb +1 -1
- data/test/BioDSL/commands/test_random.rb +1 -1
- data/test/BioDSL/commands/test_read_fasta.rb +1 -1
- data/test/BioDSL/commands/test_read_fastq.rb +1 -1
- data/test/BioDSL/commands/test_read_table.rb +1 -1
- data/test/BioDSL/commands/test_reverse_seq.rb +1 -1
- data/test/BioDSL/commands/test_slice_align.rb +1 -1
- data/test/BioDSL/commands/test_slice_seq.rb +1 -1
- data/test/BioDSL/commands/test_sort.rb +1 -1
- data/test/BioDSL/commands/test_split_pair_seq.rb +1 -1
- data/test/BioDSL/commands/test_split_values.rb +1 -1
- data/test/BioDSL/commands/test_trim_primer.rb +1 -1
- data/test/BioDSL/commands/test_trim_seq.rb +1 -1
- data/test/BioDSL/commands/test_uchime_ref.rb +1 -1
- data/test/BioDSL/commands/test_uclust.rb +1 -1
- data/test/BioDSL/commands/test_unique_values.rb +1 -1
- data/test/BioDSL/commands/test_usearch_global.rb +1 -1
- data/test/BioDSL/commands/test_usearch_local.rb +1 -1
- data/test/BioDSL/commands/test_write_fasta.rb +1 -1
- data/test/BioDSL/commands/test_write_fastq.rb +1 -1
- data/test/BioDSL/commands/test_write_table.rb +1 -1
- data/test/BioDSL/commands/test_write_tree.rb +1 -1
- data/test/BioDSL/helpers/test_options_helper.rb +3 -3
- data/test/BioDSL/seq/test_assemble.rb +58 -56
- data/test/BioDSL/seq/test_backtrack.rb +83 -81
- data/test/BioDSL/seq/test_digest.rb +47 -45
- data/test/BioDSL/seq/test_dynamic.rb +66 -64
- data/test/BioDSL/seq/test_homopolymer.rb +35 -33
- data/test/BioDSL/seq/test_kmer.rb +29 -28
- data/test/BioDSL/seq/test_translate.rb +44 -42
- data/test/BioDSL/seq/test_trim.rb +59 -57
- data/test/BioDSL/test_cary.rb +1 -1
- data/test/BioDSL/test_command.rb +2 -2
- data/test/BioDSL/test_csv.rb +34 -31
- data/test/BioDSL/test_debug.rb +31 -31
- data/test/BioDSL/test_fasta.rb +30 -29
- data/test/BioDSL/test_fastq.rb +27 -26
- data/test/BioDSL/test_filesys.rb +28 -27
- data/test/BioDSL/test_fork.rb +29 -28
- data/test/BioDSL/test_math.rb +31 -30
- data/test/BioDSL/test_mummer.rb +1 -1
- data/test/BioDSL/test_pipeline.rb +1 -1
- data/test/BioDSL/test_seq.rb +42 -41
- data/test/BioDSL/test_serializer.rb +35 -33
- data/test/BioDSL/test_stream.rb +28 -27
- data/test/BioDSL/test_taxonomy.rb +38 -37
- data/test/BioDSL/test_test.rb +32 -31
- data/test/BioDSL/test_tmp_dir.rb +1 -1
- data/test/BioDSL/test_usearch.rb +28 -27
- data/test/BioDSL/test_verbose.rb +32 -31
- data/test/helper.rb +34 -31
- metadata +3 -2
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of BioDSL (
|
24
|
+
# This software is part of BioDSL (http://maasha.github.io/BioDSL). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -143,7 +143,7 @@ module BioDSL
|
|
143
143
|
case
|
144
144
|
when @options[:first] && @pair then read_first_pair(output)
|
145
145
|
when @options[:first] then read_first_single(output)
|
146
|
-
when @options[:last]
|
146
|
+
when @options[:last] && @pair then read_last_pair(output)
|
147
147
|
when @options[:last] then read_last_single(output)
|
148
148
|
when @pair then read_all_pair(output)
|
149
149
|
else
|
@@ -176,12 +176,12 @@ module BioDSL
|
|
176
176
|
return unless input
|
177
177
|
|
178
178
|
input.each do |record|
|
179
|
-
@status[:records_in]
|
179
|
+
@status[:records_in] += 1
|
180
180
|
@status[:records_out] += 1
|
181
181
|
|
182
182
|
if (seq = record[:SEQ])
|
183
183
|
@status[:sequences_in] += 1
|
184
|
-
@status[:residues_in]
|
184
|
+
@status[:residues_in] += seq.length
|
185
185
|
end
|
186
186
|
|
187
187
|
output << record
|
@@ -197,10 +197,10 @@ module BioDSL
|
|
197
197
|
ios.each do |entry|
|
198
198
|
check_entry(entry)
|
199
199
|
output << entry.to_bp
|
200
|
-
@status[:records_out]
|
200
|
+
@status[:records_out] += 1
|
201
201
|
@status[:sequences_out] += 1
|
202
|
-
@status[:residues_out]
|
203
|
-
|
202
|
+
@status[:residues_out] += entry.length
|
203
|
+
break if @status[:sequences_out] >= @options[:first]
|
204
204
|
end
|
205
205
|
end
|
206
206
|
end
|
@@ -220,10 +220,10 @@ module BioDSL
|
|
220
220
|
reverse_complement(entry2) if @options[:reverse_complement]
|
221
221
|
output << entry1.to_bp
|
222
222
|
output << entry2.to_bp
|
223
|
-
@status[:records_out]
|
223
|
+
@status[:records_out] += 2
|
224
224
|
@status[:sequences_out] += 2
|
225
|
-
@status[:residues_out]
|
226
|
-
|
225
|
+
@status[:residues_out] += entry1.length + entry2.length
|
226
|
+
break if @status[:sequences_out] >= @options[:first]
|
227
227
|
end
|
228
228
|
end
|
229
229
|
end
|
@@ -279,9 +279,9 @@ module BioDSL
|
|
279
279
|
ios.each do |entry|
|
280
280
|
check_entry(entry)
|
281
281
|
output << entry.to_bp
|
282
|
-
@status[:records_out]
|
282
|
+
@status[:records_out] += 1
|
283
283
|
@status[:sequences_out] += 1
|
284
|
-
@status[:residues_out]
|
284
|
+
@status[:residues_out] += entry.length
|
285
285
|
end
|
286
286
|
end
|
287
287
|
end
|
@@ -299,9 +299,9 @@ module BioDSL
|
|
299
299
|
reverse_complement(entry2) if @options[:reverse_complement]
|
300
300
|
output << entry1.to_bp
|
301
301
|
output << entry2.to_bp
|
302
|
-
@status[:records_out]
|
302
|
+
@status[:records_out] += 2
|
303
303
|
@status[:sequences_out] += 2
|
304
|
-
@status[:residues_out]
|
304
|
+
@status[:residues_out] += entry1.length + entry2.length
|
305
305
|
end
|
306
306
|
end
|
307
307
|
end
|
@@ -405,9 +405,9 @@ module BioDSL
|
|
405
405
|
@buffer.each do |entry|
|
406
406
|
output << entry.to_bp
|
407
407
|
|
408
|
-
@status[:records_out]
|
408
|
+
@status[:records_out] += 1
|
409
409
|
@status[:sequences_out] += 1
|
410
|
-
@status[:residues_out]
|
410
|
+
@status[:residues_out] += entry.length
|
411
411
|
end
|
412
412
|
end
|
413
413
|
end
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -173,7 +173,6 @@ module BioDSL
|
|
173
173
|
# {:Organism=>"Mouse"}
|
174
174
|
# {:Organism=>"Cat"}
|
175
175
|
#
|
176
|
-
# rubocop: disable ClassLength
|
177
176
|
class ReadTable
|
178
177
|
STATS = %i(records_in records_out)
|
179
178
|
|
@@ -321,7 +320,7 @@ module BioDSL
|
|
321
320
|
return unless output
|
322
321
|
input.each do |record|
|
323
322
|
output << record
|
324
|
-
@status[:records_in]
|
323
|
+
@status[:records_in] += 1
|
325
324
|
@status[:records_out] += 1
|
326
325
|
end
|
327
326
|
end
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -102,10 +102,10 @@ module BioDSL
|
|
102
102
|
entry = BioDSL::Seq.new_bp(record)
|
103
103
|
entry.reverse!
|
104
104
|
|
105
|
-
@status[:sequences_in]
|
105
|
+
@status[:sequences_in] += 1
|
106
106
|
@status[:sequences_out] += 1
|
107
|
-
@status[:residues_in]
|
108
|
-
@status[:residues_out]
|
107
|
+
@status[:residues_in] += entry.length
|
108
|
+
@status[:residues_out] += entry.length
|
109
109
|
|
110
110
|
record.merge! entry.to_bp
|
111
111
|
end
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -231,7 +231,7 @@ module BioDSL
|
|
231
231
|
def defaults
|
232
232
|
@max_mis = @options[:max_mismatches] || 2
|
233
233
|
@max_ins = @options[:max_insertions] || 1
|
234
|
-
@max_del = @options[:max_deletions]
|
234
|
+
@max_del = @options[:max_deletions] || 1
|
235
235
|
end
|
236
236
|
|
237
237
|
# Parse FASTA file with one gapped template sequence if specified.
|
@@ -312,7 +312,7 @@ module BioDSL
|
|
312
312
|
entry = BioDSL::Seq.new_bp(record)
|
313
313
|
|
314
314
|
@status[:sequences_in] += 1
|
315
|
-
@status[:residues_in]
|
315
|
+
@status[:residues_in] += entry.length
|
316
316
|
|
317
317
|
setup_slice(entry) unless @slice
|
318
318
|
|
@@ -321,7 +321,7 @@ module BioDSL
|
|
321
321
|
record.merge! entry.to_bp
|
322
322
|
|
323
323
|
@status[:sequences_out] += 1
|
324
|
-
@status[:residues_out]
|
324
|
+
@status[:residues_out] += entry.length
|
325
325
|
end
|
326
326
|
|
327
327
|
# Usings primers to locate slice positions in entry.
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -138,12 +138,12 @@ module BioDSL
|
|
138
138
|
entry = BioDSL::Seq.new_bp(record)
|
139
139
|
|
140
140
|
@status[:sequences_in] += 1
|
141
|
-
@status[:residues_in]
|
141
|
+
@status[:residues_in] += entry.length
|
142
142
|
|
143
143
|
entry = entry[@options[:slice]]
|
144
144
|
|
145
145
|
@status[:sequences_out] += 1
|
146
|
-
@status[:residues_out]
|
146
|
+
@status[:residues_out] += entry.length
|
147
147
|
|
148
148
|
record.merge! entry.to_bp
|
149
149
|
end
|
data/lib/BioDSL/commands/sort.rb
CHANGED
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -146,12 +146,11 @@ module BioDSL
|
|
146
146
|
# @param output [Enumerator::Yielder] Output stream.
|
147
147
|
# @param record [Hash] BioDSL record.
|
148
148
|
#
|
149
|
-
# rubocop: disable Metrics/AbcSize
|
150
149
|
def split_pair_seq(output, record)
|
151
150
|
entry = BioDSL::Seq.new_bp(record)
|
152
151
|
|
153
152
|
@status[:sequences_in] += 1
|
154
|
-
@status[:residues_in]
|
153
|
+
@status[:residues_in] += entry.length
|
155
154
|
|
156
155
|
pos = get_split_pos(record, entry)
|
157
156
|
|
@@ -161,13 +160,13 @@ module BioDSL
|
|
161
160
|
output << entry2.to_bp
|
162
161
|
|
163
162
|
@status[:sequences_out] += 2
|
164
|
-
@status[:residues_out]
|
165
|
-
@status[:records_out]
|
163
|
+
@status[:residues_out] += entry1.length + entry2.length
|
164
|
+
@status[:records_out] += 2
|
166
165
|
end
|
167
166
|
|
168
167
|
# Given a record locate the sequence split position.
|
169
168
|
#
|
170
|
-
# @param record [Hash]
|
169
|
+
# @param record [Hash] BioDSL record.
|
171
170
|
# @param entry [BioDSL::Seq] Sequence entry.
|
172
171
|
#
|
173
172
|
# @return [Integer] Sequence split position.
|
@@ -210,7 +209,7 @@ module BioDSL
|
|
210
209
|
def fix_seq_names(entry1, entry2)
|
211
210
|
if entry1.seq_name =~ /^[^ ]+ \d:/
|
212
211
|
entry2.seq_name.sub!(/ \d:/, ' 2:')
|
213
|
-
elsif entry1.seq_name =~
|
212
|
+
elsif entry1.seq_name =~ %r{^.+\/\d$}
|
214
213
|
entry2.seq_name[-1] = '2'
|
215
214
|
else
|
216
215
|
fail "Could not match sequence name: #{entry1.seq_name}"
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -82,7 +82,7 @@ module BioDSL
|
|
82
82
|
#
|
83
83
|
# @return [SplitValues] Class instance.
|
84
84
|
def initialize(options)
|
85
|
-
@options
|
85
|
+
@options = options
|
86
86
|
|
87
87
|
check_options
|
88
88
|
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -131,10 +131,10 @@ module BioDSL
|
|
131
131
|
# @return [TrimPrimer] Class instance.
|
132
132
|
def initialize(options)
|
133
133
|
@options = options
|
134
|
-
@options[:overlap_min]
|
135
|
-
@options[:mismatch_percent]
|
134
|
+
@options[:overlap_min] ||= 1
|
135
|
+
@options[:mismatch_percent] ||= 0
|
136
136
|
@options[:insertion_percent] ||= 0
|
137
|
-
@options[:deletion_percent]
|
137
|
+
@options[:deletion_percent] ||= 0
|
138
138
|
@pattern = pattern
|
139
139
|
@hit = false
|
140
140
|
|
@@ -153,6 +153,7 @@ module BioDSL
|
|
153
153
|
|
154
154
|
if record[:SEQ] && record[:SEQ].length > 0
|
155
155
|
@status[:sequences_in] += 1
|
156
|
+
@status[:sequences_out] += 1
|
156
157
|
|
157
158
|
case @options[:direction]
|
158
159
|
when :forward then trim_forward(record)
|
@@ -198,7 +199,7 @@ module BioDSL
|
|
198
199
|
def trim_forward(record)
|
199
200
|
entry = BioDSL::Seq.new_bp(record)
|
200
201
|
|
201
|
-
@status[:residues_in]
|
202
|
+
@status[:residues_in] += entry.length
|
202
203
|
|
203
204
|
while @pattern.length >= @options[:overlap_min]
|
204
205
|
if (match = match_forward(entry))
|
@@ -235,6 +236,8 @@ module BioDSL
|
|
235
236
|
def merge_forward(record, entry, match)
|
236
237
|
entry = entry[match.pos + match.length..-1]
|
237
238
|
|
239
|
+
@status[:residues_out] += entry.length
|
240
|
+
|
238
241
|
record.merge!(entry.to_bp)
|
239
242
|
record[:TRIM_PRIMER_DIR] = 'FORWARD'
|
240
243
|
record[:TRIM_PRIMER_POS] = match.pos
|
@@ -248,7 +251,7 @@ module BioDSL
|
|
248
251
|
def trim_reverse(record)
|
249
252
|
entry = BioDSL::Seq.new_bp(record)
|
250
253
|
|
251
|
-
@status[:residues_in]
|
254
|
+
@status[:residues_in] += entry.length
|
252
255
|
|
253
256
|
while @pattern.length >= @options[:overlap_min]
|
254
257
|
if (match = match_reverse(entry))
|
@@ -288,6 +291,8 @@ module BioDSL
|
|
288
291
|
def merge_reverse(record, entry, match)
|
289
292
|
entry = entry[0...match.pos]
|
290
293
|
|
294
|
+
@status[:residues_out] += entry.length
|
295
|
+
|
291
296
|
record.merge!(entry.to_bp)
|
292
297
|
record[:TRIM_PRIMER_DIR] = 'REVERSE'
|
293
298
|
record[:TRIM_PRIMER_POS] = match.pos
|
@@ -302,9 +307,9 @@ module BioDSL
|
|
302
307
|
#
|
303
308
|
# @return [Hash] Match options hash.
|
304
309
|
def match_options(length)
|
305
|
-
mis = (length * @options[:mismatch_percent]
|
310
|
+
mis = (length * @options[:mismatch_percent] * 0.01).round
|
306
311
|
ins = (length * @options[:insertion_percent] * 0.01).round
|
307
|
-
del = (length * @options[:deletion_percent]
|
312
|
+
del = (length * @options[:deletion_percent] * 0.01).round
|
308
313
|
|
309
314
|
{max_mismatches: mis,
|
310
315
|
max_insertions: ins,
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -164,8 +164,8 @@ module BioDSL
|
|
164
164
|
# Set defaul options.
|
165
165
|
def defaults
|
166
166
|
@options[:quality_min] ||= 20
|
167
|
-
@options[:mode]
|
168
|
-
@options[:length_min]
|
167
|
+
@options[:mode] ||= :both
|
168
|
+
@options[:length_min] ||= 3
|
169
169
|
end
|
170
170
|
|
171
171
|
# Trim sequence in a given record with sequence info.
|
@@ -175,7 +175,7 @@ module BioDSL
|
|
175
175
|
entry = BioDSL::Seq.new_bp(record)
|
176
176
|
|
177
177
|
@status[:sequences_in] += 1
|
178
|
-
@status[:residues_in]
|
178
|
+
@status[:residues_in] += entry.length
|
179
179
|
|
180
180
|
case @mode
|
181
181
|
when :both then entry.quality_trim!(@min, @len)
|
@@ -184,7 +184,7 @@ module BioDSL
|
|
184
184
|
end
|
185
185
|
|
186
186
|
@status[:sequences_out] += 1
|
187
|
-
@status[:residues_out]
|
187
|
+
@status[:residues_out] += entry.length
|
188
188
|
|
189
189
|
record.merge! entry.to_bp
|
190
190
|
end
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -71,8 +71,8 @@ module BioDSL
|
|
71
71
|
@options = options
|
72
72
|
aux_exist('usearch')
|
73
73
|
check_options
|
74
|
-
@options[:cpus]
|
75
|
-
@options[:strand] ||= 'plus'
|
74
|
+
@options[:cpus] ||= 1
|
75
|
+
@options[:strand] ||= 'plus' # This option cant be changed in usearch7.0
|
76
76
|
end
|
77
77
|
|
78
78
|
# Return command lambda for uchime_ref.
|
@@ -115,7 +115,7 @@ module BioDSL
|
|
115
115
|
|
116
116
|
if record[:SEQ]
|
117
117
|
@status[:sequences_in] += 1
|
118
|
-
@status[:residues_in]
|
118
|
+
@status[:residues_in] += record[:SEQ].length
|
119
119
|
seq_name = record[:SEQ_NAME] || i.to_s
|
120
120
|
|
121
121
|
entry = BioDSL::Seq.new(seq_name: seq_name, seq: record[:SEQ])
|
@@ -161,8 +161,8 @@ module BioDSL
|
|
161
161
|
|
162
162
|
output << record
|
163
163
|
@status[:sequences_out] += 1
|
164
|
-
@status[:residues_out]
|
165
|
-
@status[:records_out]
|
164
|
+
@status[:residues_out] += entry.length
|
165
|
+
@status[:records_out] += 1
|
166
166
|
end
|
167
167
|
end
|
168
168
|
end
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -232,9 +232,9 @@ module BioDSL
|
|
232
232
|
record.merge!(entry.to_bp)
|
233
233
|
|
234
234
|
output << record
|
235
|
-
@status[:records_out]
|
235
|
+
@status[:records_out] += 1
|
236
236
|
@status[:sequences_out] += 1
|
237
|
-
@status[:residues_out]
|
237
|
+
@status[:residues_out] += entry.length
|
238
238
|
end
|
239
239
|
end
|
240
240
|
end
|
@@ -273,9 +273,9 @@ module BioDSL
|
|
273
273
|
|
274
274
|
if (r = results[record[:SEQ_NAME]])
|
275
275
|
output << record.merge(r)
|
276
|
-
@status[:records_out]
|
276
|
+
@status[:records_out] += 1
|
277
277
|
@status[:sequences_out] += 1
|
278
|
-
@status[:residues_out]
|
278
|
+
@status[:residues_out] += record[:SEQ].length
|
279
279
|
else
|
280
280
|
fail BioDSL::UsearchError, 'Sequence name: ' \
|
281
281
|
"#{record[:SEQ_NAME]} not found in uclust results"
|