BioDSL 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/BioDSL.gemspec +1 -1
- data/Gemfile +6 -0
- data/README.md +289 -155
- data/Rakefile +18 -16
- data/lib/BioDSL.rb +1 -1
- data/lib/BioDSL/cary.rb +78 -53
- data/lib/BioDSL/command.rb +2 -2
- data/lib/BioDSL/commands.rb +1 -1
- data/lib/BioDSL/commands/add_key.rb +1 -1
- data/lib/BioDSL/commands/align_seq_mothur.rb +4 -4
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +5 -5
- data/lib/BioDSL/commands/assemble_pairs.rb +13 -13
- data/lib/BioDSL/commands/assemble_seq_idba.rb +7 -9
- data/lib/BioDSL/commands/assemble_seq_ray.rb +13 -13
- data/lib/BioDSL/commands/assemble_seq_spades.rb +4 -4
- data/lib/BioDSL/commands/classify_seq.rb +8 -8
- data/lib/BioDSL/commands/classify_seq_mothur.rb +5 -5
- data/lib/BioDSL/commands/clip_primer.rb +7 -7
- data/lib/BioDSL/commands/cluster_otus.rb +5 -5
- data/lib/BioDSL/commands/collapse_otus.rb +2 -2
- data/lib/BioDSL/commands/collect_otus.rb +2 -2
- data/lib/BioDSL/commands/complement_seq.rb +4 -4
- data/lib/BioDSL/commands/count.rb +1 -1
- data/lib/BioDSL/commands/count_values.rb +2 -2
- data/lib/BioDSL/commands/degap_seq.rb +6 -7
- data/lib/BioDSL/commands/dereplicate_seq.rb +1 -1
- data/lib/BioDSL/commands/dump.rb +2 -2
- data/lib/BioDSL/commands/filter_rrna.rb +4 -4
- data/lib/BioDSL/commands/genecall.rb +7 -7
- data/lib/BioDSL/commands/grab.rb +1 -1
- data/lib/BioDSL/commands/index_taxonomy.rb +3 -3
- data/lib/BioDSL/commands/mask_seq.rb +4 -4
- data/lib/BioDSL/commands/mean_scores.rb +2 -2
- data/lib/BioDSL/commands/merge_pair_seq.rb +3 -3
- data/lib/BioDSL/commands/merge_table.rb +1 -1
- data/lib/BioDSL/commands/merge_values.rb +1 -1
- data/lib/BioDSL/commands/plot_heatmap.rb +4 -5
- data/lib/BioDSL/commands/plot_histogram.rb +4 -4
- data/lib/BioDSL/commands/plot_matches.rb +5 -5
- data/lib/BioDSL/commands/plot_residue_distribution.rb +6 -6
- data/lib/BioDSL/commands/plot_scores.rb +7 -7
- data/lib/BioDSL/commands/random.rb +1 -1
- data/lib/BioDSL/commands/read_fasta.rb +9 -9
- data/lib/BioDSL/commands/read_fastq.rb +16 -16
- data/lib/BioDSL/commands/read_table.rb +2 -3
- data/lib/BioDSL/commands/reverse_seq.rb +4 -4
- data/lib/BioDSL/commands/slice_align.rb +4 -4
- data/lib/BioDSL/commands/slice_seq.rb +3 -3
- data/lib/BioDSL/commands/sort.rb +1 -1
- data/lib/BioDSL/commands/split_pair_seq.rb +6 -7
- data/lib/BioDSL/commands/split_values.rb +2 -2
- data/lib/BioDSL/commands/trim_primer.rb +13 -8
- data/lib/BioDSL/commands/trim_seq.rb +5 -5
- data/lib/BioDSL/commands/uchime_ref.rb +6 -6
- data/lib/BioDSL/commands/uclust.rb +5 -5
- data/lib/BioDSL/commands/unique_values.rb +1 -1
- data/lib/BioDSL/commands/usearch_global.rb +2 -2
- data/lib/BioDSL/commands/usearch_local.rb +2 -2
- data/lib/BioDSL/commands/write_fasta.rb +7 -9
- data/lib/BioDSL/commands/write_fastq.rb +4 -4
- data/lib/BioDSL/commands/write_table.rb +3 -3
- data/lib/BioDSL/commands/write_tree.rb +2 -3
- data/lib/BioDSL/config.rb +2 -2
- data/lib/BioDSL/csv.rb +8 -10
- data/lib/BioDSL/debug.rb +1 -1
- data/lib/BioDSL/fasta.rb +54 -40
- data/lib/BioDSL/fastq.rb +35 -32
- data/lib/BioDSL/filesys.rb +56 -47
- data/lib/BioDSL/fork.rb +1 -1
- data/lib/BioDSL/hamming.rb +1 -1
- data/lib/BioDSL/helpers.rb +1 -1
- data/lib/BioDSL/helpers/aux_helper.rb +1 -1
- data/lib/BioDSL/helpers/email_helper.rb +1 -1
- data/lib/BioDSL/helpers/history_helper.rb +1 -1
- data/lib/BioDSL/helpers/log_helper.rb +1 -1
- data/lib/BioDSL/helpers/options_helper.rb +1 -1
- data/lib/BioDSL/helpers/status_helper.rb +1 -1
- data/lib/BioDSL/html_report.rb +1 -1
- data/lib/BioDSL/math.rb +1 -1
- data/lib/BioDSL/mummer.rb +1 -1
- data/lib/BioDSL/pipeline.rb +1 -1
- data/lib/BioDSL/seq.rb +240 -231
- data/lib/BioDSL/seq/ambiguity.rb +1 -1
- data/lib/BioDSL/seq/assemble.rb +1 -1
- data/lib/BioDSL/seq/backtrack.rb +93 -76
- data/lib/BioDSL/seq/digest.rb +1 -1
- data/lib/BioDSL/seq/dynamic.rb +43 -55
- data/lib/BioDSL/seq/homopolymer.rb +34 -36
- data/lib/BioDSL/seq/kmer.rb +67 -50
- data/lib/BioDSL/seq/levenshtein.rb +35 -40
- data/lib/BioDSL/seq/translate.rb +64 -55
- data/lib/BioDSL/seq/trim.rb +60 -50
- data/lib/BioDSL/serializer.rb +1 -1
- data/lib/BioDSL/stream.rb +1 -1
- data/lib/BioDSL/taxonomy.rb +1 -1
- data/lib/BioDSL/test.rb +1 -1
- data/lib/BioDSL/tmp_dir.rb +1 -1
- data/lib/BioDSL/usearch.rb +1 -1
- data/lib/BioDSL/verbose.rb +1 -1
- data/lib/BioDSL/version.rb +2 -2
- data/test/BioDSL/commands/test_add_key.rb +1 -1
- data/test/BioDSL/commands/test_align_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +1 -1
- data/test/BioDSL/commands/test_assemble_pairs.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_idba.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_ray.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_spades.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_clip_primer.rb +1 -1
- data/test/BioDSL/commands/test_cluster_otus.rb +1 -1
- data/test/BioDSL/commands/test_collapse_otus.rb +1 -1
- data/test/BioDSL/commands/test_collect_otus.rb +1 -1
- data/test/BioDSL/commands/test_complement_seq.rb +1 -1
- data/test/BioDSL/commands/test_count.rb +1 -1
- data/test/BioDSL/commands/test_count_values.rb +1 -1
- data/test/BioDSL/commands/test_degap_seq.rb +1 -1
- data/test/BioDSL/commands/test_dereplicate_seq.rb +1 -1
- data/test/BioDSL/commands/test_dump.rb +1 -1
- data/test/BioDSL/commands/test_filter_rrna.rb +1 -1
- data/test/BioDSL/commands/test_genecall.rb +1 -1
- data/test/BioDSL/commands/test_grab.rb +1 -1
- data/test/BioDSL/commands/test_index_taxonomy.rb +1 -1
- data/test/BioDSL/commands/test_mask_seq.rb +1 -1
- data/test/BioDSL/commands/test_mean_scores.rb +1 -1
- data/test/BioDSL/commands/test_merge_pair_seq.rb +1 -1
- data/test/BioDSL/commands/test_merge_table.rb +1 -1
- data/test/BioDSL/commands/test_merge_values.rb +1 -1
- data/test/BioDSL/commands/test_plot_heatmap.rb +1 -1
- data/test/BioDSL/commands/test_plot_histogram.rb +1 -1
- data/test/BioDSL/commands/test_plot_matches.rb +1 -1
- data/test/BioDSL/commands/test_plot_residue_distribution.rb +1 -1
- data/test/BioDSL/commands/test_plot_scores.rb +1 -1
- data/test/BioDSL/commands/test_random.rb +1 -1
- data/test/BioDSL/commands/test_read_fasta.rb +1 -1
- data/test/BioDSL/commands/test_read_fastq.rb +1 -1
- data/test/BioDSL/commands/test_read_table.rb +1 -1
- data/test/BioDSL/commands/test_reverse_seq.rb +1 -1
- data/test/BioDSL/commands/test_slice_align.rb +1 -1
- data/test/BioDSL/commands/test_slice_seq.rb +1 -1
- data/test/BioDSL/commands/test_sort.rb +1 -1
- data/test/BioDSL/commands/test_split_pair_seq.rb +1 -1
- data/test/BioDSL/commands/test_split_values.rb +1 -1
- data/test/BioDSL/commands/test_trim_primer.rb +1 -1
- data/test/BioDSL/commands/test_trim_seq.rb +1 -1
- data/test/BioDSL/commands/test_uchime_ref.rb +1 -1
- data/test/BioDSL/commands/test_uclust.rb +1 -1
- data/test/BioDSL/commands/test_unique_values.rb +1 -1
- data/test/BioDSL/commands/test_usearch_global.rb +1 -1
- data/test/BioDSL/commands/test_usearch_local.rb +1 -1
- data/test/BioDSL/commands/test_write_fasta.rb +1 -1
- data/test/BioDSL/commands/test_write_fastq.rb +1 -1
- data/test/BioDSL/commands/test_write_table.rb +1 -1
- data/test/BioDSL/commands/test_write_tree.rb +1 -1
- data/test/BioDSL/helpers/test_options_helper.rb +3 -3
- data/test/BioDSL/seq/test_assemble.rb +58 -56
- data/test/BioDSL/seq/test_backtrack.rb +83 -81
- data/test/BioDSL/seq/test_digest.rb +47 -45
- data/test/BioDSL/seq/test_dynamic.rb +66 -64
- data/test/BioDSL/seq/test_homopolymer.rb +35 -33
- data/test/BioDSL/seq/test_kmer.rb +29 -28
- data/test/BioDSL/seq/test_translate.rb +44 -42
- data/test/BioDSL/seq/test_trim.rb +59 -57
- data/test/BioDSL/test_cary.rb +1 -1
- data/test/BioDSL/test_command.rb +2 -2
- data/test/BioDSL/test_csv.rb +34 -31
- data/test/BioDSL/test_debug.rb +31 -31
- data/test/BioDSL/test_fasta.rb +30 -29
- data/test/BioDSL/test_fastq.rb +27 -26
- data/test/BioDSL/test_filesys.rb +28 -27
- data/test/BioDSL/test_fork.rb +29 -28
- data/test/BioDSL/test_math.rb +31 -30
- data/test/BioDSL/test_mummer.rb +1 -1
- data/test/BioDSL/test_pipeline.rb +1 -1
- data/test/BioDSL/test_seq.rb +42 -41
- data/test/BioDSL/test_serializer.rb +35 -33
- data/test/BioDSL/test_stream.rb +28 -27
- data/test/BioDSL/test_taxonomy.rb +38 -37
- data/test/BioDSL/test_test.rb +32 -31
- data/test/BioDSL/test_tmp_dir.rb +1 -1
- data/test/BioDSL/test_usearch.rb +28 -27
- data/test/BioDSL/test_verbose.rb +32 -31
- data/test/helper.rb +34 -31
- metadata +3 -2
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of BioDSL (
|
|
24
|
+
# This software is part of BioDSL (http://maasha.github.io/BioDSL). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -143,7 +143,7 @@ module BioDSL
|
|
|
143
143
|
case
|
|
144
144
|
when @options[:first] && @pair then read_first_pair(output)
|
|
145
145
|
when @options[:first] then read_first_single(output)
|
|
146
|
-
when @options[:last]
|
|
146
|
+
when @options[:last] && @pair then read_last_pair(output)
|
|
147
147
|
when @options[:last] then read_last_single(output)
|
|
148
148
|
when @pair then read_all_pair(output)
|
|
149
149
|
else
|
|
@@ -176,12 +176,12 @@ module BioDSL
|
|
|
176
176
|
return unless input
|
|
177
177
|
|
|
178
178
|
input.each do |record|
|
|
179
|
-
@status[:records_in]
|
|
179
|
+
@status[:records_in] += 1
|
|
180
180
|
@status[:records_out] += 1
|
|
181
181
|
|
|
182
182
|
if (seq = record[:SEQ])
|
|
183
183
|
@status[:sequences_in] += 1
|
|
184
|
-
@status[:residues_in]
|
|
184
|
+
@status[:residues_in] += seq.length
|
|
185
185
|
end
|
|
186
186
|
|
|
187
187
|
output << record
|
|
@@ -197,10 +197,10 @@ module BioDSL
|
|
|
197
197
|
ios.each do |entry|
|
|
198
198
|
check_entry(entry)
|
|
199
199
|
output << entry.to_bp
|
|
200
|
-
@status[:records_out]
|
|
200
|
+
@status[:records_out] += 1
|
|
201
201
|
@status[:sequences_out] += 1
|
|
202
|
-
@status[:residues_out]
|
|
203
|
-
|
|
202
|
+
@status[:residues_out] += entry.length
|
|
203
|
+
break if @status[:sequences_out] >= @options[:first]
|
|
204
204
|
end
|
|
205
205
|
end
|
|
206
206
|
end
|
|
@@ -220,10 +220,10 @@ module BioDSL
|
|
|
220
220
|
reverse_complement(entry2) if @options[:reverse_complement]
|
|
221
221
|
output << entry1.to_bp
|
|
222
222
|
output << entry2.to_bp
|
|
223
|
-
@status[:records_out]
|
|
223
|
+
@status[:records_out] += 2
|
|
224
224
|
@status[:sequences_out] += 2
|
|
225
|
-
@status[:residues_out]
|
|
226
|
-
|
|
225
|
+
@status[:residues_out] += entry1.length + entry2.length
|
|
226
|
+
break if @status[:sequences_out] >= @options[:first]
|
|
227
227
|
end
|
|
228
228
|
end
|
|
229
229
|
end
|
|
@@ -279,9 +279,9 @@ module BioDSL
|
|
|
279
279
|
ios.each do |entry|
|
|
280
280
|
check_entry(entry)
|
|
281
281
|
output << entry.to_bp
|
|
282
|
-
@status[:records_out]
|
|
282
|
+
@status[:records_out] += 1
|
|
283
283
|
@status[:sequences_out] += 1
|
|
284
|
-
@status[:residues_out]
|
|
284
|
+
@status[:residues_out] += entry.length
|
|
285
285
|
end
|
|
286
286
|
end
|
|
287
287
|
end
|
|
@@ -299,9 +299,9 @@ module BioDSL
|
|
|
299
299
|
reverse_complement(entry2) if @options[:reverse_complement]
|
|
300
300
|
output << entry1.to_bp
|
|
301
301
|
output << entry2.to_bp
|
|
302
|
-
@status[:records_out]
|
|
302
|
+
@status[:records_out] += 2
|
|
303
303
|
@status[:sequences_out] += 2
|
|
304
|
-
@status[:residues_out]
|
|
304
|
+
@status[:residues_out] += entry1.length + entry2.length
|
|
305
305
|
end
|
|
306
306
|
end
|
|
307
307
|
end
|
|
@@ -405,9 +405,9 @@ module BioDSL
|
|
|
405
405
|
@buffer.each do |entry|
|
|
406
406
|
output << entry.to_bp
|
|
407
407
|
|
|
408
|
-
@status[:records_out]
|
|
408
|
+
@status[:records_out] += 1
|
|
409
409
|
@status[:sequences_out] += 1
|
|
410
|
-
@status[:residues_out]
|
|
410
|
+
@status[:residues_out] += entry.length
|
|
411
411
|
end
|
|
412
412
|
end
|
|
413
413
|
end
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -173,7 +173,6 @@ module BioDSL
|
|
|
173
173
|
# {:Organism=>"Mouse"}
|
|
174
174
|
# {:Organism=>"Cat"}
|
|
175
175
|
#
|
|
176
|
-
# rubocop: disable ClassLength
|
|
177
176
|
class ReadTable
|
|
178
177
|
STATS = %i(records_in records_out)
|
|
179
178
|
|
|
@@ -321,7 +320,7 @@ module BioDSL
|
|
|
321
320
|
return unless output
|
|
322
321
|
input.each do |record|
|
|
323
322
|
output << record
|
|
324
|
-
@status[:records_in]
|
|
323
|
+
@status[:records_in] += 1
|
|
325
324
|
@status[:records_out] += 1
|
|
326
325
|
end
|
|
327
326
|
end
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -102,10 +102,10 @@ module BioDSL
|
|
|
102
102
|
entry = BioDSL::Seq.new_bp(record)
|
|
103
103
|
entry.reverse!
|
|
104
104
|
|
|
105
|
-
@status[:sequences_in]
|
|
105
|
+
@status[:sequences_in] += 1
|
|
106
106
|
@status[:sequences_out] += 1
|
|
107
|
-
@status[:residues_in]
|
|
108
|
-
@status[:residues_out]
|
|
107
|
+
@status[:residues_in] += entry.length
|
|
108
|
+
@status[:residues_out] += entry.length
|
|
109
109
|
|
|
110
110
|
record.merge! entry.to_bp
|
|
111
111
|
end
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -231,7 +231,7 @@ module BioDSL
|
|
|
231
231
|
def defaults
|
|
232
232
|
@max_mis = @options[:max_mismatches] || 2
|
|
233
233
|
@max_ins = @options[:max_insertions] || 1
|
|
234
|
-
@max_del = @options[:max_deletions]
|
|
234
|
+
@max_del = @options[:max_deletions] || 1
|
|
235
235
|
end
|
|
236
236
|
|
|
237
237
|
# Parse FASTA file with one gapped template sequence if specified.
|
|
@@ -312,7 +312,7 @@ module BioDSL
|
|
|
312
312
|
entry = BioDSL::Seq.new_bp(record)
|
|
313
313
|
|
|
314
314
|
@status[:sequences_in] += 1
|
|
315
|
-
@status[:residues_in]
|
|
315
|
+
@status[:residues_in] += entry.length
|
|
316
316
|
|
|
317
317
|
setup_slice(entry) unless @slice
|
|
318
318
|
|
|
@@ -321,7 +321,7 @@ module BioDSL
|
|
|
321
321
|
record.merge! entry.to_bp
|
|
322
322
|
|
|
323
323
|
@status[:sequences_out] += 1
|
|
324
|
-
@status[:residues_out]
|
|
324
|
+
@status[:residues_out] += entry.length
|
|
325
325
|
end
|
|
326
326
|
|
|
327
327
|
# Usings primers to locate slice positions in entry.
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -138,12 +138,12 @@ module BioDSL
|
|
|
138
138
|
entry = BioDSL::Seq.new_bp(record)
|
|
139
139
|
|
|
140
140
|
@status[:sequences_in] += 1
|
|
141
|
-
@status[:residues_in]
|
|
141
|
+
@status[:residues_in] += entry.length
|
|
142
142
|
|
|
143
143
|
entry = entry[@options[:slice]]
|
|
144
144
|
|
|
145
145
|
@status[:sequences_out] += 1
|
|
146
|
-
@status[:residues_out]
|
|
146
|
+
@status[:residues_out] += entry.length
|
|
147
147
|
|
|
148
148
|
record.merge! entry.to_bp
|
|
149
149
|
end
|
data/lib/BioDSL/commands/sort.rb
CHANGED
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -146,12 +146,11 @@ module BioDSL
|
|
|
146
146
|
# @param output [Enumerator::Yielder] Output stream.
|
|
147
147
|
# @param record [Hash] BioDSL record.
|
|
148
148
|
#
|
|
149
|
-
# rubocop: disable Metrics/AbcSize
|
|
150
149
|
def split_pair_seq(output, record)
|
|
151
150
|
entry = BioDSL::Seq.new_bp(record)
|
|
152
151
|
|
|
153
152
|
@status[:sequences_in] += 1
|
|
154
|
-
@status[:residues_in]
|
|
153
|
+
@status[:residues_in] += entry.length
|
|
155
154
|
|
|
156
155
|
pos = get_split_pos(record, entry)
|
|
157
156
|
|
|
@@ -161,13 +160,13 @@ module BioDSL
|
|
|
161
160
|
output << entry2.to_bp
|
|
162
161
|
|
|
163
162
|
@status[:sequences_out] += 2
|
|
164
|
-
@status[:residues_out]
|
|
165
|
-
@status[:records_out]
|
|
163
|
+
@status[:residues_out] += entry1.length + entry2.length
|
|
164
|
+
@status[:records_out] += 2
|
|
166
165
|
end
|
|
167
166
|
|
|
168
167
|
# Given a record locate the sequence split position.
|
|
169
168
|
#
|
|
170
|
-
# @param record [Hash]
|
|
169
|
+
# @param record [Hash] BioDSL record.
|
|
171
170
|
# @param entry [BioDSL::Seq] Sequence entry.
|
|
172
171
|
#
|
|
173
172
|
# @return [Integer] Sequence split position.
|
|
@@ -210,7 +209,7 @@ module BioDSL
|
|
|
210
209
|
def fix_seq_names(entry1, entry2)
|
|
211
210
|
if entry1.seq_name =~ /^[^ ]+ \d:/
|
|
212
211
|
entry2.seq_name.sub!(/ \d:/, ' 2:')
|
|
213
|
-
elsif entry1.seq_name =~
|
|
212
|
+
elsif entry1.seq_name =~ %r{^.+\/\d$}
|
|
214
213
|
entry2.seq_name[-1] = '2'
|
|
215
214
|
else
|
|
216
215
|
fail "Could not match sequence name: #{entry1.seq_name}"
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -82,7 +82,7 @@ module BioDSL
|
|
|
82
82
|
#
|
|
83
83
|
# @return [SplitValues] Class instance.
|
|
84
84
|
def initialize(options)
|
|
85
|
-
@options
|
|
85
|
+
@options = options
|
|
86
86
|
|
|
87
87
|
check_options
|
|
88
88
|
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -131,10 +131,10 @@ module BioDSL
|
|
|
131
131
|
# @return [TrimPrimer] Class instance.
|
|
132
132
|
def initialize(options)
|
|
133
133
|
@options = options
|
|
134
|
-
@options[:overlap_min]
|
|
135
|
-
@options[:mismatch_percent]
|
|
134
|
+
@options[:overlap_min] ||= 1
|
|
135
|
+
@options[:mismatch_percent] ||= 0
|
|
136
136
|
@options[:insertion_percent] ||= 0
|
|
137
|
-
@options[:deletion_percent]
|
|
137
|
+
@options[:deletion_percent] ||= 0
|
|
138
138
|
@pattern = pattern
|
|
139
139
|
@hit = false
|
|
140
140
|
|
|
@@ -153,6 +153,7 @@ module BioDSL
|
|
|
153
153
|
|
|
154
154
|
if record[:SEQ] && record[:SEQ].length > 0
|
|
155
155
|
@status[:sequences_in] += 1
|
|
156
|
+
@status[:sequences_out] += 1
|
|
156
157
|
|
|
157
158
|
case @options[:direction]
|
|
158
159
|
when :forward then trim_forward(record)
|
|
@@ -198,7 +199,7 @@ module BioDSL
|
|
|
198
199
|
def trim_forward(record)
|
|
199
200
|
entry = BioDSL::Seq.new_bp(record)
|
|
200
201
|
|
|
201
|
-
@status[:residues_in]
|
|
202
|
+
@status[:residues_in] += entry.length
|
|
202
203
|
|
|
203
204
|
while @pattern.length >= @options[:overlap_min]
|
|
204
205
|
if (match = match_forward(entry))
|
|
@@ -235,6 +236,8 @@ module BioDSL
|
|
|
235
236
|
def merge_forward(record, entry, match)
|
|
236
237
|
entry = entry[match.pos + match.length..-1]
|
|
237
238
|
|
|
239
|
+
@status[:residues_out] += entry.length
|
|
240
|
+
|
|
238
241
|
record.merge!(entry.to_bp)
|
|
239
242
|
record[:TRIM_PRIMER_DIR] = 'FORWARD'
|
|
240
243
|
record[:TRIM_PRIMER_POS] = match.pos
|
|
@@ -248,7 +251,7 @@ module BioDSL
|
|
|
248
251
|
def trim_reverse(record)
|
|
249
252
|
entry = BioDSL::Seq.new_bp(record)
|
|
250
253
|
|
|
251
|
-
@status[:residues_in]
|
|
254
|
+
@status[:residues_in] += entry.length
|
|
252
255
|
|
|
253
256
|
while @pattern.length >= @options[:overlap_min]
|
|
254
257
|
if (match = match_reverse(entry))
|
|
@@ -288,6 +291,8 @@ module BioDSL
|
|
|
288
291
|
def merge_reverse(record, entry, match)
|
|
289
292
|
entry = entry[0...match.pos]
|
|
290
293
|
|
|
294
|
+
@status[:residues_out] += entry.length
|
|
295
|
+
|
|
291
296
|
record.merge!(entry.to_bp)
|
|
292
297
|
record[:TRIM_PRIMER_DIR] = 'REVERSE'
|
|
293
298
|
record[:TRIM_PRIMER_POS] = match.pos
|
|
@@ -302,9 +307,9 @@ module BioDSL
|
|
|
302
307
|
#
|
|
303
308
|
# @return [Hash] Match options hash.
|
|
304
309
|
def match_options(length)
|
|
305
|
-
mis = (length * @options[:mismatch_percent]
|
|
310
|
+
mis = (length * @options[:mismatch_percent] * 0.01).round
|
|
306
311
|
ins = (length * @options[:insertion_percent] * 0.01).round
|
|
307
|
-
del = (length * @options[:deletion_percent]
|
|
312
|
+
del = (length * @options[:deletion_percent] * 0.01).round
|
|
308
313
|
|
|
309
314
|
{max_mismatches: mis,
|
|
310
315
|
max_insertions: ins,
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -164,8 +164,8 @@ module BioDSL
|
|
|
164
164
|
# Set defaul options.
|
|
165
165
|
def defaults
|
|
166
166
|
@options[:quality_min] ||= 20
|
|
167
|
-
@options[:mode]
|
|
168
|
-
@options[:length_min]
|
|
167
|
+
@options[:mode] ||= :both
|
|
168
|
+
@options[:length_min] ||= 3
|
|
169
169
|
end
|
|
170
170
|
|
|
171
171
|
# Trim sequence in a given record with sequence info.
|
|
@@ -175,7 +175,7 @@ module BioDSL
|
|
|
175
175
|
entry = BioDSL::Seq.new_bp(record)
|
|
176
176
|
|
|
177
177
|
@status[:sequences_in] += 1
|
|
178
|
-
@status[:residues_in]
|
|
178
|
+
@status[:residues_in] += entry.length
|
|
179
179
|
|
|
180
180
|
case @mode
|
|
181
181
|
when :both then entry.quality_trim!(@min, @len)
|
|
@@ -184,7 +184,7 @@ module BioDSL
|
|
|
184
184
|
end
|
|
185
185
|
|
|
186
186
|
@status[:sequences_out] += 1
|
|
187
|
-
@status[:residues_out]
|
|
187
|
+
@status[:residues_out] += entry.length
|
|
188
188
|
|
|
189
189
|
record.merge! entry.to_bp
|
|
190
190
|
end
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -71,8 +71,8 @@ module BioDSL
|
|
|
71
71
|
@options = options
|
|
72
72
|
aux_exist('usearch')
|
|
73
73
|
check_options
|
|
74
|
-
@options[:cpus]
|
|
75
|
-
@options[:strand] ||= 'plus'
|
|
74
|
+
@options[:cpus] ||= 1
|
|
75
|
+
@options[:strand] ||= 'plus' # This option cant be changed in usearch7.0
|
|
76
76
|
end
|
|
77
77
|
|
|
78
78
|
# Return command lambda for uchime_ref.
|
|
@@ -115,7 +115,7 @@ module BioDSL
|
|
|
115
115
|
|
|
116
116
|
if record[:SEQ]
|
|
117
117
|
@status[:sequences_in] += 1
|
|
118
|
-
@status[:residues_in]
|
|
118
|
+
@status[:residues_in] += record[:SEQ].length
|
|
119
119
|
seq_name = record[:SEQ_NAME] || i.to_s
|
|
120
120
|
|
|
121
121
|
entry = BioDSL::Seq.new(seq_name: seq_name, seq: record[:SEQ])
|
|
@@ -161,8 +161,8 @@ module BioDSL
|
|
|
161
161
|
|
|
162
162
|
output << record
|
|
163
163
|
@status[:sequences_out] += 1
|
|
164
|
-
@status[:residues_out]
|
|
165
|
-
@status[:records_out]
|
|
164
|
+
@status[:residues_out] += entry.length
|
|
165
|
+
@status[:records_out] += 1
|
|
166
166
|
end
|
|
167
167
|
end
|
|
168
168
|
end
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -232,9 +232,9 @@ module BioDSL
|
|
|
232
232
|
record.merge!(entry.to_bp)
|
|
233
233
|
|
|
234
234
|
output << record
|
|
235
|
-
@status[:records_out]
|
|
235
|
+
@status[:records_out] += 1
|
|
236
236
|
@status[:sequences_out] += 1
|
|
237
|
-
@status[:residues_out]
|
|
237
|
+
@status[:residues_out] += entry.length
|
|
238
238
|
end
|
|
239
239
|
end
|
|
240
240
|
end
|
|
@@ -273,9 +273,9 @@ module BioDSL
|
|
|
273
273
|
|
|
274
274
|
if (r = results[record[:SEQ_NAME]])
|
|
275
275
|
output << record.merge(r)
|
|
276
|
-
@status[:records_out]
|
|
276
|
+
@status[:records_out] += 1
|
|
277
277
|
@status[:sequences_out] += 1
|
|
278
|
-
@status[:residues_out]
|
|
278
|
+
@status[:residues_out] += record[:SEQ].length
|
|
279
279
|
else
|
|
280
280
|
fail BioDSL::UsearchError, 'Sequence name: ' \
|
|
281
281
|
"#{record[:SEQ_NAME]} not found in uclust results"
|