BioDSL 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/BioDSL.gemspec +1 -1
- data/Gemfile +6 -0
- data/README.md +289 -155
- data/Rakefile +18 -16
- data/lib/BioDSL.rb +1 -1
- data/lib/BioDSL/cary.rb +78 -53
- data/lib/BioDSL/command.rb +2 -2
- data/lib/BioDSL/commands.rb +1 -1
- data/lib/BioDSL/commands/add_key.rb +1 -1
- data/lib/BioDSL/commands/align_seq_mothur.rb +4 -4
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +5 -5
- data/lib/BioDSL/commands/assemble_pairs.rb +13 -13
- data/lib/BioDSL/commands/assemble_seq_idba.rb +7 -9
- data/lib/BioDSL/commands/assemble_seq_ray.rb +13 -13
- data/lib/BioDSL/commands/assemble_seq_spades.rb +4 -4
- data/lib/BioDSL/commands/classify_seq.rb +8 -8
- data/lib/BioDSL/commands/classify_seq_mothur.rb +5 -5
- data/lib/BioDSL/commands/clip_primer.rb +7 -7
- data/lib/BioDSL/commands/cluster_otus.rb +5 -5
- data/lib/BioDSL/commands/collapse_otus.rb +2 -2
- data/lib/BioDSL/commands/collect_otus.rb +2 -2
- data/lib/BioDSL/commands/complement_seq.rb +4 -4
- data/lib/BioDSL/commands/count.rb +1 -1
- data/lib/BioDSL/commands/count_values.rb +2 -2
- data/lib/BioDSL/commands/degap_seq.rb +6 -7
- data/lib/BioDSL/commands/dereplicate_seq.rb +1 -1
- data/lib/BioDSL/commands/dump.rb +2 -2
- data/lib/BioDSL/commands/filter_rrna.rb +4 -4
- data/lib/BioDSL/commands/genecall.rb +7 -7
- data/lib/BioDSL/commands/grab.rb +1 -1
- data/lib/BioDSL/commands/index_taxonomy.rb +3 -3
- data/lib/BioDSL/commands/mask_seq.rb +4 -4
- data/lib/BioDSL/commands/mean_scores.rb +2 -2
- data/lib/BioDSL/commands/merge_pair_seq.rb +3 -3
- data/lib/BioDSL/commands/merge_table.rb +1 -1
- data/lib/BioDSL/commands/merge_values.rb +1 -1
- data/lib/BioDSL/commands/plot_heatmap.rb +4 -5
- data/lib/BioDSL/commands/plot_histogram.rb +4 -4
- data/lib/BioDSL/commands/plot_matches.rb +5 -5
- data/lib/BioDSL/commands/plot_residue_distribution.rb +6 -6
- data/lib/BioDSL/commands/plot_scores.rb +7 -7
- data/lib/BioDSL/commands/random.rb +1 -1
- data/lib/BioDSL/commands/read_fasta.rb +9 -9
- data/lib/BioDSL/commands/read_fastq.rb +16 -16
- data/lib/BioDSL/commands/read_table.rb +2 -3
- data/lib/BioDSL/commands/reverse_seq.rb +4 -4
- data/lib/BioDSL/commands/slice_align.rb +4 -4
- data/lib/BioDSL/commands/slice_seq.rb +3 -3
- data/lib/BioDSL/commands/sort.rb +1 -1
- data/lib/BioDSL/commands/split_pair_seq.rb +6 -7
- data/lib/BioDSL/commands/split_values.rb +2 -2
- data/lib/BioDSL/commands/trim_primer.rb +13 -8
- data/lib/BioDSL/commands/trim_seq.rb +5 -5
- data/lib/BioDSL/commands/uchime_ref.rb +6 -6
- data/lib/BioDSL/commands/uclust.rb +5 -5
- data/lib/BioDSL/commands/unique_values.rb +1 -1
- data/lib/BioDSL/commands/usearch_global.rb +2 -2
- data/lib/BioDSL/commands/usearch_local.rb +2 -2
- data/lib/BioDSL/commands/write_fasta.rb +7 -9
- data/lib/BioDSL/commands/write_fastq.rb +4 -4
- data/lib/BioDSL/commands/write_table.rb +3 -3
- data/lib/BioDSL/commands/write_tree.rb +2 -3
- data/lib/BioDSL/config.rb +2 -2
- data/lib/BioDSL/csv.rb +8 -10
- data/lib/BioDSL/debug.rb +1 -1
- data/lib/BioDSL/fasta.rb +54 -40
- data/lib/BioDSL/fastq.rb +35 -32
- data/lib/BioDSL/filesys.rb +56 -47
- data/lib/BioDSL/fork.rb +1 -1
- data/lib/BioDSL/hamming.rb +1 -1
- data/lib/BioDSL/helpers.rb +1 -1
- data/lib/BioDSL/helpers/aux_helper.rb +1 -1
- data/lib/BioDSL/helpers/email_helper.rb +1 -1
- data/lib/BioDSL/helpers/history_helper.rb +1 -1
- data/lib/BioDSL/helpers/log_helper.rb +1 -1
- data/lib/BioDSL/helpers/options_helper.rb +1 -1
- data/lib/BioDSL/helpers/status_helper.rb +1 -1
- data/lib/BioDSL/html_report.rb +1 -1
- data/lib/BioDSL/math.rb +1 -1
- data/lib/BioDSL/mummer.rb +1 -1
- data/lib/BioDSL/pipeline.rb +1 -1
- data/lib/BioDSL/seq.rb +240 -231
- data/lib/BioDSL/seq/ambiguity.rb +1 -1
- data/lib/BioDSL/seq/assemble.rb +1 -1
- data/lib/BioDSL/seq/backtrack.rb +93 -76
- data/lib/BioDSL/seq/digest.rb +1 -1
- data/lib/BioDSL/seq/dynamic.rb +43 -55
- data/lib/BioDSL/seq/homopolymer.rb +34 -36
- data/lib/BioDSL/seq/kmer.rb +67 -50
- data/lib/BioDSL/seq/levenshtein.rb +35 -40
- data/lib/BioDSL/seq/translate.rb +64 -55
- data/lib/BioDSL/seq/trim.rb +60 -50
- data/lib/BioDSL/serializer.rb +1 -1
- data/lib/BioDSL/stream.rb +1 -1
- data/lib/BioDSL/taxonomy.rb +1 -1
- data/lib/BioDSL/test.rb +1 -1
- data/lib/BioDSL/tmp_dir.rb +1 -1
- data/lib/BioDSL/usearch.rb +1 -1
- data/lib/BioDSL/verbose.rb +1 -1
- data/lib/BioDSL/version.rb +2 -2
- data/test/BioDSL/commands/test_add_key.rb +1 -1
- data/test/BioDSL/commands/test_align_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +1 -1
- data/test/BioDSL/commands/test_assemble_pairs.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_idba.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_ray.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_spades.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_clip_primer.rb +1 -1
- data/test/BioDSL/commands/test_cluster_otus.rb +1 -1
- data/test/BioDSL/commands/test_collapse_otus.rb +1 -1
- data/test/BioDSL/commands/test_collect_otus.rb +1 -1
- data/test/BioDSL/commands/test_complement_seq.rb +1 -1
- data/test/BioDSL/commands/test_count.rb +1 -1
- data/test/BioDSL/commands/test_count_values.rb +1 -1
- data/test/BioDSL/commands/test_degap_seq.rb +1 -1
- data/test/BioDSL/commands/test_dereplicate_seq.rb +1 -1
- data/test/BioDSL/commands/test_dump.rb +1 -1
- data/test/BioDSL/commands/test_filter_rrna.rb +1 -1
- data/test/BioDSL/commands/test_genecall.rb +1 -1
- data/test/BioDSL/commands/test_grab.rb +1 -1
- data/test/BioDSL/commands/test_index_taxonomy.rb +1 -1
- data/test/BioDSL/commands/test_mask_seq.rb +1 -1
- data/test/BioDSL/commands/test_mean_scores.rb +1 -1
- data/test/BioDSL/commands/test_merge_pair_seq.rb +1 -1
- data/test/BioDSL/commands/test_merge_table.rb +1 -1
- data/test/BioDSL/commands/test_merge_values.rb +1 -1
- data/test/BioDSL/commands/test_plot_heatmap.rb +1 -1
- data/test/BioDSL/commands/test_plot_histogram.rb +1 -1
- data/test/BioDSL/commands/test_plot_matches.rb +1 -1
- data/test/BioDSL/commands/test_plot_residue_distribution.rb +1 -1
- data/test/BioDSL/commands/test_plot_scores.rb +1 -1
- data/test/BioDSL/commands/test_random.rb +1 -1
- data/test/BioDSL/commands/test_read_fasta.rb +1 -1
- data/test/BioDSL/commands/test_read_fastq.rb +1 -1
- data/test/BioDSL/commands/test_read_table.rb +1 -1
- data/test/BioDSL/commands/test_reverse_seq.rb +1 -1
- data/test/BioDSL/commands/test_slice_align.rb +1 -1
- data/test/BioDSL/commands/test_slice_seq.rb +1 -1
- data/test/BioDSL/commands/test_sort.rb +1 -1
- data/test/BioDSL/commands/test_split_pair_seq.rb +1 -1
- data/test/BioDSL/commands/test_split_values.rb +1 -1
- data/test/BioDSL/commands/test_trim_primer.rb +1 -1
- data/test/BioDSL/commands/test_trim_seq.rb +1 -1
- data/test/BioDSL/commands/test_uchime_ref.rb +1 -1
- data/test/BioDSL/commands/test_uclust.rb +1 -1
- data/test/BioDSL/commands/test_unique_values.rb +1 -1
- data/test/BioDSL/commands/test_usearch_global.rb +1 -1
- data/test/BioDSL/commands/test_usearch_local.rb +1 -1
- data/test/BioDSL/commands/test_write_fasta.rb +1 -1
- data/test/BioDSL/commands/test_write_fastq.rb +1 -1
- data/test/BioDSL/commands/test_write_table.rb +1 -1
- data/test/BioDSL/commands/test_write_tree.rb +1 -1
- data/test/BioDSL/helpers/test_options_helper.rb +3 -3
- data/test/BioDSL/seq/test_assemble.rb +58 -56
- data/test/BioDSL/seq/test_backtrack.rb +83 -81
- data/test/BioDSL/seq/test_digest.rb +47 -45
- data/test/BioDSL/seq/test_dynamic.rb +66 -64
- data/test/BioDSL/seq/test_homopolymer.rb +35 -33
- data/test/BioDSL/seq/test_kmer.rb +29 -28
- data/test/BioDSL/seq/test_translate.rb +44 -42
- data/test/BioDSL/seq/test_trim.rb +59 -57
- data/test/BioDSL/test_cary.rb +1 -1
- data/test/BioDSL/test_command.rb +2 -2
- data/test/BioDSL/test_csv.rb +34 -31
- data/test/BioDSL/test_debug.rb +31 -31
- data/test/BioDSL/test_fasta.rb +30 -29
- data/test/BioDSL/test_fastq.rb +27 -26
- data/test/BioDSL/test_filesys.rb +28 -27
- data/test/BioDSL/test_fork.rb +29 -28
- data/test/BioDSL/test_math.rb +31 -30
- data/test/BioDSL/test_mummer.rb +1 -1
- data/test/BioDSL/test_pipeline.rb +1 -1
- data/test/BioDSL/test_seq.rb +42 -41
- data/test/BioDSL/test_serializer.rb +35 -33
- data/test/BioDSL/test_stream.rb +28 -27
- data/test/BioDSL/test_taxonomy.rb +38 -37
- data/test/BioDSL/test_test.rb +32 -31
- data/test/BioDSL/test_tmp_dir.rb +1 -1
- data/test/BioDSL/test_usearch.rb +28 -27
- data/test/BioDSL/test_verbose.rb +32 -31
- data/test/helper.rb +34 -31
- metadata +3 -2
@@ -21,13 +21,11 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
28
28
|
module BioDSL
|
29
|
-
# rubocop:disable ClassLength
|
30
|
-
|
31
29
|
# == Assemble sequences the stream using IDBA_UD.
|
32
30
|
#
|
33
31
|
# +assemble_seq_idba+ is a wrapper around the prokaryotic metagenome
|
@@ -99,7 +97,7 @@ module BioDSL
|
|
99
97
|
TmpDir.create('reads.fna', 'contig.fa') do |fa_in, fa_out, tmp_dir|
|
100
98
|
process_input(input, output, fa_in)
|
101
99
|
execute_idba(fa_in, tmp_dir)
|
102
|
-
|
100
|
+
process_output(output, fa_out)
|
103
101
|
end
|
104
102
|
|
105
103
|
calc_n50(status)
|
@@ -123,7 +121,7 @@ module BioDSL
|
|
123
121
|
def defaults
|
124
122
|
@options[:kmer_min] ||= 24
|
125
123
|
@options[:kmer_max] ||= 48
|
126
|
-
@options[:cpus]
|
124
|
+
@options[:cpus] ||= 1
|
127
125
|
end
|
128
126
|
|
129
127
|
# Read all records from input and emit non-sequence records to the output
|
@@ -141,7 +139,7 @@ module BioDSL
|
|
141
139
|
entry = BioDSL::Seq.new_bp(record)
|
142
140
|
|
143
141
|
@status[:sequences_in] += 1
|
144
|
-
@status[:residues_in]
|
142
|
+
@status[:residues_in] += entry.length
|
145
143
|
|
146
144
|
fasta_io.puts entry.to_fasta
|
147
145
|
else
|
@@ -193,9 +191,9 @@ module BioDSL
|
|
193
191
|
BioDSL::Fasta.open(fa_out, 'r') do |ios|
|
194
192
|
ios.each do |entry|
|
195
193
|
output << entry.to_bp
|
196
|
-
@status[:records_out]
|
194
|
+
@status[:records_out] += 1
|
197
195
|
@status[:sequences_out] += 1
|
198
|
-
@status[:residues_out]
|
196
|
+
@status[:residues_out] += entry.length
|
199
197
|
|
200
198
|
@lengths << entry.length
|
201
199
|
end
|
@@ -212,7 +210,7 @@ module BioDSL
|
|
212
210
|
@lengths.reverse!
|
213
211
|
|
214
212
|
status[:contig_max] = @lengths.first || 0
|
215
|
-
status[:contig_min] = @lengths.last
|
213
|
+
status[:contig_min] = @lengths.last || 0
|
216
214
|
status[:contig_n50] = 0
|
217
215
|
|
218
216
|
count = 0
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -164,10 +164,10 @@ module BioDSL
|
|
164
164
|
|
165
165
|
# Set the default option values.
|
166
166
|
def defaults
|
167
|
-
@options[:kmer_min]
|
168
|
-
@options[:kmer_max]
|
167
|
+
@options[:kmer_min] ||= 21
|
168
|
+
@options[:kmer_max] ||= 49
|
169
169
|
@options[:contig_min] ||= 500
|
170
|
-
@options[:cpus]
|
170
|
+
@options[:cpus] ||= 1
|
171
171
|
end
|
172
172
|
|
173
173
|
# Read all records from input and emit non-sequence records to the output
|
@@ -185,7 +185,7 @@ module BioDSL
|
|
185
185
|
entry = BioDSL::Seq.new_bp(record)
|
186
186
|
|
187
187
|
@status[:sequences_in] += 1
|
188
|
-
@status[:residues_in]
|
188
|
+
@status[:residues_in] += entry.length
|
189
189
|
|
190
190
|
fasta_io.puts entry.to_fasta
|
191
191
|
else
|
@@ -314,11 +314,11 @@ module BioDSL
|
|
314
314
|
next if entry.length < @options[:contig_min]
|
315
315
|
|
316
316
|
lengths << entry.length
|
317
|
-
output
|
317
|
+
output << entry.to_bp
|
318
318
|
|
319
|
-
@status[:records_out]
|
319
|
+
@status[:records_out] += 1
|
320
320
|
@status[:sequences_out] += 1
|
321
|
-
@status[:residues_out]
|
321
|
+
@status[:residues_out] += entry.length
|
322
322
|
end
|
323
323
|
end
|
324
324
|
|
@@ -333,11 +333,11 @@ module BioDSL
|
|
333
333
|
@status[:kmer] = kmer
|
334
334
|
@status[:paired] = @paired
|
335
335
|
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
336
|
+
return if lengths.empty?
|
337
|
+
|
338
|
+
@status[:contig_min] = lengths.min
|
339
|
+
@status[:contig_max] = lengths.max
|
340
|
+
@status[:n50] = calc_n50(lengths)
|
341
341
|
end
|
342
342
|
|
343
343
|
N50 = Struct.new(:kmer, :n50)
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -160,7 +160,7 @@ module BioDSL
|
|
160
160
|
entry = BioDSL::Seq.new_bp(record)
|
161
161
|
|
162
162
|
@status[:sequences_in] += 1
|
163
|
-
@status[:residues_in]
|
163
|
+
@status[:residues_in] += entry.length
|
164
164
|
|
165
165
|
if entry.qual
|
166
166
|
@type = :fastq
|
@@ -216,9 +216,9 @@ module BioDSL
|
|
216
216
|
BioDSL::Fasta.open(output_file) do |ios|
|
217
217
|
ios.each do |entry|
|
218
218
|
output << entry.to_bp
|
219
|
-
@status[:records_out]
|
219
|
+
@status[:records_out] += 1
|
220
220
|
@status[:sequences_out] += 1
|
221
|
-
@status[:residues_out]
|
221
|
+
@status[:residues_out] += entry.length
|
222
222
|
|
223
223
|
@lengths << entry.length
|
224
224
|
end
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -185,12 +185,12 @@ module BioDSL
|
|
185
185
|
|
186
186
|
# Set default options.
|
187
187
|
def defaults
|
188
|
-
@options[:prefix]
|
188
|
+
@options[:prefix] ||= 'taxonomy'
|
189
189
|
@options[:kmer_size] ||= 8
|
190
190
|
@options[:step_size] ||= 1
|
191
|
-
@options[:hits_max]
|
191
|
+
@options[:hits_max] ||= 50
|
192
192
|
@options[:consensus] ||= 0.51
|
193
|
-
@options[:coverage]
|
193
|
+
@options[:coverage] ||= 0.9
|
194
194
|
@options[:best_only] = true if @options[:best_only].nil?
|
195
195
|
end
|
196
196
|
|
@@ -200,14 +200,14 @@ module BioDSL
|
|
200
200
|
# @param i [Fixnum] Record number,
|
201
201
|
# @param search [BioDSL::Taxonomy::Search] Search object.
|
202
202
|
def classify_seq(record, i, search)
|
203
|
-
@status[:sequences_in]
|
203
|
+
@status[:sequences_in] += 1
|
204
204
|
@status[:sequences_out] += 1
|
205
|
-
@status[:residues_in]
|
206
|
-
@status[:residues_out]
|
205
|
+
@status[:residues_in] += record[:SEQ].length
|
206
|
+
@status[:residues_out] += record[:SEQ].length
|
207
207
|
seq_name = record[:SEQ_NAME] || i.to_s
|
208
208
|
|
209
209
|
result = search.execute(BioDSL::Seq.new(seq_name: seq_name,
|
210
|
-
|
210
|
+
seq: record[:SEQ]))
|
211
211
|
|
212
212
|
record[:TAXONOMY] = result.taxonomy
|
213
213
|
record[:TAXONOMY_HITS] = result.hits
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -128,7 +128,7 @@ module BioDSL
|
|
128
128
|
# Set default options.
|
129
129
|
def defaults
|
130
130
|
@options[:confidence] ||= 80
|
131
|
-
@options[:cpus]
|
131
|
+
@options[:cpus] ||= 1
|
132
132
|
end
|
133
133
|
|
134
134
|
# Process input data and save sequences to a temporary file for
|
@@ -143,10 +143,10 @@ module BioDSL
|
|
143
143
|
@status[:records_in] += 1
|
144
144
|
|
145
145
|
if record[:SEQ]
|
146
|
-
@status[:sequences_in]
|
146
|
+
@status[:sequences_in] += 1
|
147
147
|
@status[:sequences_out] += 1
|
148
|
-
@status[:residues_in]
|
149
|
-
@status[:records_out]
|
148
|
+
@status[:residues_in] += record[:SEQ].length
|
149
|
+
@status[:records_out] += record[:SEQ].length
|
150
150
|
seq_name = record[:SEQ_NAME] || i.to_s
|
151
151
|
|
152
152
|
entry = BioDSL::Seq.new(seq_name: seq_name, seq: record[:SEQ])
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -166,16 +166,16 @@ module BioDSL
|
|
166
166
|
|
167
167
|
# Set default option values.
|
168
168
|
def defaults
|
169
|
-
@options[:mismatch_percent]
|
169
|
+
@options[:mismatch_percent] ||= 0
|
170
170
|
@options[:insertion_percent] ||= 0
|
171
|
-
@options[:deletion_percent]
|
171
|
+
@options[:deletion_percent] ||= 0
|
172
172
|
end
|
173
173
|
|
174
174
|
# Calculate the mismatch percentage.
|
175
175
|
#
|
176
176
|
# @return [Float] Mismatch percentage.
|
177
177
|
def calc_mis
|
178
|
-
(@primer.length * @options[:mismatch_percent]
|
178
|
+
(@primer.length * @options[:mismatch_percent] * 0.01).round
|
179
179
|
end
|
180
180
|
|
181
181
|
# Calculate the insertion percentage.
|
@@ -189,7 +189,7 @@ module BioDSL
|
|
189
189
|
#
|
190
190
|
# @return [Float] Deletion percentage.
|
191
191
|
def calc_del
|
192
|
-
(@primer.length * @options[:deletion_percent]
|
192
|
+
(@primer.length * @options[:deletion_percent] * 0.01).round
|
193
193
|
end
|
194
194
|
|
195
195
|
# Reset any previous clip_primer results from record.
|
@@ -207,7 +207,7 @@ module BioDSL
|
|
207
207
|
entry = BioDSL::Seq.new_bp(record)
|
208
208
|
|
209
209
|
@status[:sequences_in] += 1
|
210
|
-
@status[:residues_in]
|
210
|
+
@status[:residues_in] += entry.length
|
211
211
|
|
212
212
|
case @options[:direction]
|
213
213
|
when :forward then clip_primer_forward(record, entry)
|
@@ -217,7 +217,7 @@ module BioDSL
|
|
217
217
|
end
|
218
218
|
|
219
219
|
@status[:sequences_out] += 1
|
220
|
-
@status[:residues_out]
|
220
|
+
@status[:residues_out] += entry.length
|
221
221
|
end
|
222
222
|
|
223
223
|
# Clip forward primer from entry and save clip information
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -89,8 +89,8 @@ module BioDSL
|
|
89
89
|
process_input(input, output, tmp_in)
|
90
90
|
|
91
91
|
BioDSL::Usearch.cluster_otus(input: tmp_in, output: tmp_out,
|
92
|
-
|
93
|
-
|
92
|
+
identity: @options[:identity],
|
93
|
+
verbose: @options[:verbose])
|
94
94
|
|
95
95
|
process_output(output, tmp_out)
|
96
96
|
end
|
@@ -172,8 +172,8 @@ module BioDSL
|
|
172
172
|
|
173
173
|
output << record
|
174
174
|
@status[:sequences_out] += 1
|
175
|
-
@status[:residues_out]
|
176
|
-
@status[:records_out]
|
175
|
+
@status[:residues_out] += record[:SEQ].length
|
176
|
+
@status[:records_out] += 1
|
177
177
|
end
|
178
178
|
end
|
179
179
|
end
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -162,7 +162,7 @@ module BioDSL
|
|
162
162
|
def write_tax(hash, output)
|
163
163
|
hash.each_value do |record|
|
164
164
|
output << record
|
165
|
-
@status[:otus_out]
|
165
|
+
@status[:otus_out] += 1
|
166
166
|
@status[:records_out] += 1
|
167
167
|
end
|
168
168
|
end
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -142,7 +142,7 @@ module BioDSL
|
|
142
142
|
|
143
143
|
output << record
|
144
144
|
|
145
|
-
@status[:hits_out]
|
145
|
+
@status[:hits_out] += 1
|
146
146
|
@status[:records_out] += 1
|
147
147
|
end
|
148
148
|
end
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -106,10 +106,10 @@ module BioDSL
|
|
106
106
|
entry.type = @type
|
107
107
|
entry.complement!
|
108
108
|
|
109
|
-
@status[:sequences_in]
|
109
|
+
@status[:sequences_in] += 1
|
110
110
|
@status[:sequences_out] += 1
|
111
|
-
@status[:residues_in]
|
112
|
-
@status[:residues_out]
|
111
|
+
@status[:residues_in] += entry.length
|
112
|
+
@status[:residues_out] += entry.length
|
113
113
|
|
114
114
|
record.merge! entry.to_bp
|
115
115
|
end
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -74,7 +74,7 @@ module BioDSL
|
|
74
74
|
#
|
75
75
|
# @return [CountValues] Instance of class.
|
76
76
|
def initialize(options)
|
77
|
-
@options
|
77
|
+
@options = options
|
78
78
|
|
79
79
|
check_options
|
80
80
|
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -68,7 +68,6 @@ module BioDSL
|
|
68
68
|
# {:SEQ_NAME=>"test1", :SEQ=>"A-GTC", :SEQ_LEN=>5}
|
69
69
|
# {:SEQ_NAME=>"test2", :SEQ=>"AGGTC", :SEQ_LEN=>5}
|
70
70
|
#
|
71
|
-
# rubocop:disable ClassLength
|
72
71
|
class DegapSeq
|
73
72
|
require 'narray'
|
74
73
|
|
@@ -157,14 +156,14 @@ module BioDSL
|
|
157
156
|
# @param seq [String] Sequences.
|
158
157
|
def mask_add(seq)
|
159
158
|
@status[:sequences_in] += 1
|
160
|
-
@status[:residues_in]
|
159
|
+
@status[:residues_in] += seq.length
|
161
160
|
|
162
161
|
@max_len ||= seq.length
|
163
162
|
|
164
163
|
check_length(seq)
|
165
164
|
|
166
165
|
@na_mask ||= NArray.int(seq.length)
|
167
|
-
na_seq
|
166
|
+
na_seq = NArray.to_na(seq, 'byte')
|
168
167
|
@indels.each_char { |c| @na_mask += na_seq.eq(c.ord) }
|
169
168
|
end
|
170
169
|
|
@@ -212,7 +211,7 @@ module BioDSL
|
|
212
211
|
record[:SEQ_LEN] = record[:SEQ].length
|
213
212
|
|
214
213
|
@status[:sequences_out] += 1
|
215
|
-
@status[:residues_out]
|
214
|
+
@status[:residues_out] += record[:SEQ].length
|
216
215
|
end
|
217
216
|
|
218
217
|
# Remove all gaps from all sequences in input stream and output to output
|
@@ -240,12 +239,12 @@ module BioDSL
|
|
240
239
|
entry = BioDSL::Seq.new_bp(record)
|
241
240
|
|
242
241
|
@status[:sequences_in] += 1
|
243
|
-
@status[:residues_in]
|
242
|
+
@status[:residues_in] += entry.length
|
244
243
|
|
245
244
|
entry.seq.delete!(@indels)
|
246
245
|
|
247
246
|
@status[:sequences_out] += 1
|
248
|
-
@status[:residues_out]
|
247
|
+
@status[:residues_out] += entry.length
|
249
248
|
|
250
249
|
record.merge! entry.to_bp
|
251
250
|
end
|