BioDSL 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/BioDSL.gemspec +1 -1
- data/Gemfile +6 -0
- data/README.md +289 -155
- data/Rakefile +18 -16
- data/lib/BioDSL.rb +1 -1
- data/lib/BioDSL/cary.rb +78 -53
- data/lib/BioDSL/command.rb +2 -2
- data/lib/BioDSL/commands.rb +1 -1
- data/lib/BioDSL/commands/add_key.rb +1 -1
- data/lib/BioDSL/commands/align_seq_mothur.rb +4 -4
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +5 -5
- data/lib/BioDSL/commands/assemble_pairs.rb +13 -13
- data/lib/BioDSL/commands/assemble_seq_idba.rb +7 -9
- data/lib/BioDSL/commands/assemble_seq_ray.rb +13 -13
- data/lib/BioDSL/commands/assemble_seq_spades.rb +4 -4
- data/lib/BioDSL/commands/classify_seq.rb +8 -8
- data/lib/BioDSL/commands/classify_seq_mothur.rb +5 -5
- data/lib/BioDSL/commands/clip_primer.rb +7 -7
- data/lib/BioDSL/commands/cluster_otus.rb +5 -5
- data/lib/BioDSL/commands/collapse_otus.rb +2 -2
- data/lib/BioDSL/commands/collect_otus.rb +2 -2
- data/lib/BioDSL/commands/complement_seq.rb +4 -4
- data/lib/BioDSL/commands/count.rb +1 -1
- data/lib/BioDSL/commands/count_values.rb +2 -2
- data/lib/BioDSL/commands/degap_seq.rb +6 -7
- data/lib/BioDSL/commands/dereplicate_seq.rb +1 -1
- data/lib/BioDSL/commands/dump.rb +2 -2
- data/lib/BioDSL/commands/filter_rrna.rb +4 -4
- data/lib/BioDSL/commands/genecall.rb +7 -7
- data/lib/BioDSL/commands/grab.rb +1 -1
- data/lib/BioDSL/commands/index_taxonomy.rb +3 -3
- data/lib/BioDSL/commands/mask_seq.rb +4 -4
- data/lib/BioDSL/commands/mean_scores.rb +2 -2
- data/lib/BioDSL/commands/merge_pair_seq.rb +3 -3
- data/lib/BioDSL/commands/merge_table.rb +1 -1
- data/lib/BioDSL/commands/merge_values.rb +1 -1
- data/lib/BioDSL/commands/plot_heatmap.rb +4 -5
- data/lib/BioDSL/commands/plot_histogram.rb +4 -4
- data/lib/BioDSL/commands/plot_matches.rb +5 -5
- data/lib/BioDSL/commands/plot_residue_distribution.rb +6 -6
- data/lib/BioDSL/commands/plot_scores.rb +7 -7
- data/lib/BioDSL/commands/random.rb +1 -1
- data/lib/BioDSL/commands/read_fasta.rb +9 -9
- data/lib/BioDSL/commands/read_fastq.rb +16 -16
- data/lib/BioDSL/commands/read_table.rb +2 -3
- data/lib/BioDSL/commands/reverse_seq.rb +4 -4
- data/lib/BioDSL/commands/slice_align.rb +4 -4
- data/lib/BioDSL/commands/slice_seq.rb +3 -3
- data/lib/BioDSL/commands/sort.rb +1 -1
- data/lib/BioDSL/commands/split_pair_seq.rb +6 -7
- data/lib/BioDSL/commands/split_values.rb +2 -2
- data/lib/BioDSL/commands/trim_primer.rb +13 -8
- data/lib/BioDSL/commands/trim_seq.rb +5 -5
- data/lib/BioDSL/commands/uchime_ref.rb +6 -6
- data/lib/BioDSL/commands/uclust.rb +5 -5
- data/lib/BioDSL/commands/unique_values.rb +1 -1
- data/lib/BioDSL/commands/usearch_global.rb +2 -2
- data/lib/BioDSL/commands/usearch_local.rb +2 -2
- data/lib/BioDSL/commands/write_fasta.rb +7 -9
- data/lib/BioDSL/commands/write_fastq.rb +4 -4
- data/lib/BioDSL/commands/write_table.rb +3 -3
- data/lib/BioDSL/commands/write_tree.rb +2 -3
- data/lib/BioDSL/config.rb +2 -2
- data/lib/BioDSL/csv.rb +8 -10
- data/lib/BioDSL/debug.rb +1 -1
- data/lib/BioDSL/fasta.rb +54 -40
- data/lib/BioDSL/fastq.rb +35 -32
- data/lib/BioDSL/filesys.rb +56 -47
- data/lib/BioDSL/fork.rb +1 -1
- data/lib/BioDSL/hamming.rb +1 -1
- data/lib/BioDSL/helpers.rb +1 -1
- data/lib/BioDSL/helpers/aux_helper.rb +1 -1
- data/lib/BioDSL/helpers/email_helper.rb +1 -1
- data/lib/BioDSL/helpers/history_helper.rb +1 -1
- data/lib/BioDSL/helpers/log_helper.rb +1 -1
- data/lib/BioDSL/helpers/options_helper.rb +1 -1
- data/lib/BioDSL/helpers/status_helper.rb +1 -1
- data/lib/BioDSL/html_report.rb +1 -1
- data/lib/BioDSL/math.rb +1 -1
- data/lib/BioDSL/mummer.rb +1 -1
- data/lib/BioDSL/pipeline.rb +1 -1
- data/lib/BioDSL/seq.rb +240 -231
- data/lib/BioDSL/seq/ambiguity.rb +1 -1
- data/lib/BioDSL/seq/assemble.rb +1 -1
- data/lib/BioDSL/seq/backtrack.rb +93 -76
- data/lib/BioDSL/seq/digest.rb +1 -1
- data/lib/BioDSL/seq/dynamic.rb +43 -55
- data/lib/BioDSL/seq/homopolymer.rb +34 -36
- data/lib/BioDSL/seq/kmer.rb +67 -50
- data/lib/BioDSL/seq/levenshtein.rb +35 -40
- data/lib/BioDSL/seq/translate.rb +64 -55
- data/lib/BioDSL/seq/trim.rb +60 -50
- data/lib/BioDSL/serializer.rb +1 -1
- data/lib/BioDSL/stream.rb +1 -1
- data/lib/BioDSL/taxonomy.rb +1 -1
- data/lib/BioDSL/test.rb +1 -1
- data/lib/BioDSL/tmp_dir.rb +1 -1
- data/lib/BioDSL/usearch.rb +1 -1
- data/lib/BioDSL/verbose.rb +1 -1
- data/lib/BioDSL/version.rb +2 -2
- data/test/BioDSL/commands/test_add_key.rb +1 -1
- data/test/BioDSL/commands/test_align_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +1 -1
- data/test/BioDSL/commands/test_assemble_pairs.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_idba.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_ray.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_spades.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_clip_primer.rb +1 -1
- data/test/BioDSL/commands/test_cluster_otus.rb +1 -1
- data/test/BioDSL/commands/test_collapse_otus.rb +1 -1
- data/test/BioDSL/commands/test_collect_otus.rb +1 -1
- data/test/BioDSL/commands/test_complement_seq.rb +1 -1
- data/test/BioDSL/commands/test_count.rb +1 -1
- data/test/BioDSL/commands/test_count_values.rb +1 -1
- data/test/BioDSL/commands/test_degap_seq.rb +1 -1
- data/test/BioDSL/commands/test_dereplicate_seq.rb +1 -1
- data/test/BioDSL/commands/test_dump.rb +1 -1
- data/test/BioDSL/commands/test_filter_rrna.rb +1 -1
- data/test/BioDSL/commands/test_genecall.rb +1 -1
- data/test/BioDSL/commands/test_grab.rb +1 -1
- data/test/BioDSL/commands/test_index_taxonomy.rb +1 -1
- data/test/BioDSL/commands/test_mask_seq.rb +1 -1
- data/test/BioDSL/commands/test_mean_scores.rb +1 -1
- data/test/BioDSL/commands/test_merge_pair_seq.rb +1 -1
- data/test/BioDSL/commands/test_merge_table.rb +1 -1
- data/test/BioDSL/commands/test_merge_values.rb +1 -1
- data/test/BioDSL/commands/test_plot_heatmap.rb +1 -1
- data/test/BioDSL/commands/test_plot_histogram.rb +1 -1
- data/test/BioDSL/commands/test_plot_matches.rb +1 -1
- data/test/BioDSL/commands/test_plot_residue_distribution.rb +1 -1
- data/test/BioDSL/commands/test_plot_scores.rb +1 -1
- data/test/BioDSL/commands/test_random.rb +1 -1
- data/test/BioDSL/commands/test_read_fasta.rb +1 -1
- data/test/BioDSL/commands/test_read_fastq.rb +1 -1
- data/test/BioDSL/commands/test_read_table.rb +1 -1
- data/test/BioDSL/commands/test_reverse_seq.rb +1 -1
- data/test/BioDSL/commands/test_slice_align.rb +1 -1
- data/test/BioDSL/commands/test_slice_seq.rb +1 -1
- data/test/BioDSL/commands/test_sort.rb +1 -1
- data/test/BioDSL/commands/test_split_pair_seq.rb +1 -1
- data/test/BioDSL/commands/test_split_values.rb +1 -1
- data/test/BioDSL/commands/test_trim_primer.rb +1 -1
- data/test/BioDSL/commands/test_trim_seq.rb +1 -1
- data/test/BioDSL/commands/test_uchime_ref.rb +1 -1
- data/test/BioDSL/commands/test_uclust.rb +1 -1
- data/test/BioDSL/commands/test_unique_values.rb +1 -1
- data/test/BioDSL/commands/test_usearch_global.rb +1 -1
- data/test/BioDSL/commands/test_usearch_local.rb +1 -1
- data/test/BioDSL/commands/test_write_fasta.rb +1 -1
- data/test/BioDSL/commands/test_write_fastq.rb +1 -1
- data/test/BioDSL/commands/test_write_table.rb +1 -1
- data/test/BioDSL/commands/test_write_tree.rb +1 -1
- data/test/BioDSL/helpers/test_options_helper.rb +3 -3
- data/test/BioDSL/seq/test_assemble.rb +58 -56
- data/test/BioDSL/seq/test_backtrack.rb +83 -81
- data/test/BioDSL/seq/test_digest.rb +47 -45
- data/test/BioDSL/seq/test_dynamic.rb +66 -64
- data/test/BioDSL/seq/test_homopolymer.rb +35 -33
- data/test/BioDSL/seq/test_kmer.rb +29 -28
- data/test/BioDSL/seq/test_translate.rb +44 -42
- data/test/BioDSL/seq/test_trim.rb +59 -57
- data/test/BioDSL/test_cary.rb +1 -1
- data/test/BioDSL/test_command.rb +2 -2
- data/test/BioDSL/test_csv.rb +34 -31
- data/test/BioDSL/test_debug.rb +31 -31
- data/test/BioDSL/test_fasta.rb +30 -29
- data/test/BioDSL/test_fastq.rb +27 -26
- data/test/BioDSL/test_filesys.rb +28 -27
- data/test/BioDSL/test_fork.rb +29 -28
- data/test/BioDSL/test_math.rb +31 -30
- data/test/BioDSL/test_mummer.rb +1 -1
- data/test/BioDSL/test_pipeline.rb +1 -1
- data/test/BioDSL/test_seq.rb +42 -41
- data/test/BioDSL/test_serializer.rb +35 -33
- data/test/BioDSL/test_stream.rb +28 -27
- data/test/BioDSL/test_taxonomy.rb +38 -37
- data/test/BioDSL/test_test.rb +32 -31
- data/test/BioDSL/test_tmp_dir.rb +1 -1
- data/test/BioDSL/test_usearch.rb +28 -27
- data/test/BioDSL/test_verbose.rb +32 -31
- data/test/helper.rb +34 -31
- metadata +3 -2
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
data/lib/BioDSL/commands/dump.rb
CHANGED
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -63,7 +63,7 @@ module BioDSL
|
|
|
63
63
|
#
|
|
64
64
|
# @return [Dump] Returns an instance of the Dump class.
|
|
65
65
|
def initialize(options)
|
|
66
|
-
@options
|
|
66
|
+
@options = options
|
|
67
67
|
|
|
68
68
|
check_options
|
|
69
69
|
end
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -197,7 +197,7 @@ module BioDSL
|
|
|
197
197
|
def record2entry(record, i)
|
|
198
198
|
entry = BioDSL::Seq.new(seq_name: i, seq: record[:SEQ])
|
|
199
199
|
@status[:sequences_in] += 1
|
|
200
|
-
@status[:residues_in]
|
|
200
|
+
@status[:residues_in] += entry.length
|
|
201
201
|
entry
|
|
202
202
|
end
|
|
203
203
|
|
|
@@ -226,9 +226,9 @@ module BioDSL
|
|
|
226
226
|
if record.key? :SEQ
|
|
227
227
|
unless @filter.include? i
|
|
228
228
|
output << record
|
|
229
|
-
@status[:records_out]
|
|
229
|
+
@status[:records_out] += 1
|
|
230
230
|
@status[:sequences_out] += 1
|
|
231
|
-
@status[:residues_out]
|
|
231
|
+
@status[:residues_out] += record[:SEQ].length
|
|
232
232
|
end
|
|
233
233
|
else
|
|
234
234
|
output << record
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -160,7 +160,7 @@ module BioDSL
|
|
|
160
160
|
|
|
161
161
|
# Set the default option values.
|
|
162
162
|
def defaults
|
|
163
|
-
@options[:type]
|
|
163
|
+
@options[:type] ||= :dna
|
|
164
164
|
@options[:procedure] ||= :single
|
|
165
165
|
end
|
|
166
166
|
|
|
@@ -179,10 +179,10 @@ module BioDSL
|
|
|
179
179
|
entry = BioDSL::Seq.new(seq_name: i, seq: record[:SEQ])
|
|
180
180
|
@names[i] = record[:SEQ_NAME] || i
|
|
181
181
|
|
|
182
|
-
@status[:sequences_in]
|
|
182
|
+
@status[:sequences_in] += 1
|
|
183
183
|
@status[:sequences_out] += 1
|
|
184
|
-
@status[:residues_in]
|
|
185
|
-
@status[:residues_out]
|
|
184
|
+
@status[:residues_in] += entry.length
|
|
185
|
+
@status[:residues_out] += entry.length
|
|
186
186
|
|
|
187
187
|
fasta_io.puts entry.to_fasta
|
|
188
188
|
end
|
|
@@ -205,9 +205,9 @@ module BioDSL
|
|
|
205
205
|
ios.each do |entry|
|
|
206
206
|
output << parse_entry(entry)
|
|
207
207
|
|
|
208
|
-
@status[:records_out]
|
|
208
|
+
@status[:records_out] += 1
|
|
209
209
|
@status[:sequences_out] += 1
|
|
210
|
-
@status[:residues_out]
|
|
210
|
+
@status[:residues_out] += entry.length
|
|
211
211
|
end
|
|
212
212
|
end
|
|
213
213
|
end
|
data/lib/BioDSL/commands/grab.rb
CHANGED
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -139,7 +139,7 @@ module BioDSL
|
|
|
139
139
|
create_output_dir
|
|
140
140
|
check_output_files
|
|
141
141
|
|
|
142
|
-
@index
|
|
142
|
+
@index = BioDSL::Taxonomy::Index.new(options)
|
|
143
143
|
end
|
|
144
144
|
|
|
145
145
|
# Return command lambda for index_taxonomy.
|
|
@@ -179,7 +179,7 @@ module BioDSL
|
|
|
179
179
|
|
|
180
180
|
# Set the default options hash values.
|
|
181
181
|
def defaults
|
|
182
|
-
@options[:prefix]
|
|
182
|
+
@options[:prefix] ||= 'taxonomy'
|
|
183
183
|
@options[:kmer_size] ||= 8
|
|
184
184
|
@options[:step_size] ||= 1
|
|
185
185
|
end
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -136,7 +136,7 @@ module BioDSL
|
|
|
136
136
|
# Set default options.
|
|
137
137
|
def defaults
|
|
138
138
|
@options[:quality_min] ||= 20
|
|
139
|
-
@options[:mask]
|
|
139
|
+
@options[:mask] ||= :soft
|
|
140
140
|
end
|
|
141
141
|
|
|
142
142
|
# Mask sequence in given record.
|
|
@@ -146,12 +146,12 @@ module BioDSL
|
|
|
146
146
|
entry = BioDSL::Seq.new_bp(record)
|
|
147
147
|
|
|
148
148
|
@status[:sequences_in] += 1
|
|
149
|
-
@status[:residues_in]
|
|
149
|
+
@status[:residues_in] += entry.length
|
|
150
150
|
|
|
151
151
|
@mask == :soft ? mask_seq_soft(entry) : mask_seq_hard(entry)
|
|
152
152
|
|
|
153
153
|
@status[:sequences_out] += 1
|
|
154
|
-
@status[:residues_out]
|
|
154
|
+
@status[:residues_out] += entry.length
|
|
155
155
|
|
|
156
156
|
record.merge! entry.to_bp
|
|
157
157
|
end
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -159,7 +159,7 @@ module BioDSL
|
|
|
159
159
|
record[:SCORES_MEAN] = mean
|
|
160
160
|
end
|
|
161
161
|
|
|
162
|
-
@sum
|
|
162
|
+
@sum += mean
|
|
163
163
|
@status[:min_mean] = mean if mean < @status[:min_mean]
|
|
164
164
|
@status[:max_mean] = mean if mean > @status[:max_mean]
|
|
165
165
|
@count += 1
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -122,9 +122,9 @@ module BioDSL
|
|
|
122
122
|
if record1[:SEQ] && record2[:SEQ]
|
|
123
123
|
output << merge_pair_seq(record1, record2)
|
|
124
124
|
|
|
125
|
-
@status[:sequences_in]
|
|
125
|
+
@status[:sequences_in] += 2
|
|
126
126
|
@status[:sequences_out] += 1
|
|
127
|
-
@status[:records_out]
|
|
127
|
+
@status[:records_out] += 1
|
|
128
128
|
else
|
|
129
129
|
output.puts record1, record2
|
|
130
130
|
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -66,7 +66,6 @@ module BioDSL
|
|
|
66
66
|
#
|
|
67
67
|
# BD.new.read_table(input: "test.tab").plot_heatmap.run
|
|
68
68
|
#
|
|
69
|
-
# rubocop:disable ClassLength
|
|
70
69
|
class PlotHeatmap
|
|
71
70
|
require 'gnuplotter'
|
|
72
71
|
require 'set'
|
|
@@ -133,9 +132,9 @@ module BioDSL
|
|
|
133
132
|
# Set default options.
|
|
134
133
|
def defaults
|
|
135
134
|
@options[:terminal] ||= :dumb
|
|
136
|
-
@options[:title]
|
|
137
|
-
@options[:xlabel]
|
|
138
|
-
@options[:ylabel]
|
|
135
|
+
@options[:title] ||= 'Heatmap'
|
|
136
|
+
@options[:xlabel] ||= 'x'
|
|
137
|
+
@options[:ylabel] ||= 'y'
|
|
139
138
|
end
|
|
140
139
|
|
|
141
140
|
# Compile a set of keys to skip.
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -167,9 +167,9 @@ module BioDSL
|
|
|
167
167
|
# Set default values for options hash.
|
|
168
168
|
def defaults
|
|
169
169
|
@options[:terminal] ||= :dumb
|
|
170
|
-
@options[:title]
|
|
171
|
-
@options[:xlabel]
|
|
172
|
-
@options[:ylabel]
|
|
170
|
+
@options[:title] ||= 'Histogram'
|
|
171
|
+
@options[:xlabel] ||= @options[:key]
|
|
172
|
+
@options[:ylabel] ||= 'n'
|
|
173
173
|
|
|
174
174
|
@options[:ylogscale] &&
|
|
175
175
|
@options[:ylabel] = "log10(#{@options[:ylabel]})"
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -180,10 +180,10 @@ module BioDSL
|
|
|
180
180
|
# Set default options.
|
|
181
181
|
def defaults
|
|
182
182
|
@options[:direction] ||= :both
|
|
183
|
-
@options[:terminal]
|
|
184
|
-
@options[:title]
|
|
185
|
-
@options[:xlabel]
|
|
186
|
-
@options[:ylabel]
|
|
183
|
+
@options[:terminal] ||= :dumb
|
|
184
|
+
@options[:title] ||= 'Matches'
|
|
185
|
+
@options[:xlabel] ||= 'x'
|
|
186
|
+
@options[:ylabel] ||= 'y'
|
|
187
187
|
end
|
|
188
188
|
|
|
189
189
|
# Set plot default attributes.
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -147,9 +147,9 @@ module BioDSL
|
|
|
147
147
|
# Set default options.
|
|
148
148
|
def defaults
|
|
149
149
|
@options[:terminal] ||= :dumb
|
|
150
|
-
@options[:title]
|
|
151
|
-
@options[:xlabel]
|
|
152
|
-
@options[:ylabel]
|
|
150
|
+
@options[:title] ||= 'Residue Distribution'
|
|
151
|
+
@options[:xlabel] ||= 'Sequence position'
|
|
152
|
+
@options[:ylabel] ||= '%'
|
|
153
153
|
end
|
|
154
154
|
|
|
155
155
|
# Given a record with a sequence count its residues.
|
|
@@ -157,12 +157,12 @@ module BioDSL
|
|
|
157
157
|
# @param record [Hash] BioDSL record
|
|
158
158
|
def count_residues(record)
|
|
159
159
|
@status[:sequences_in] += 1
|
|
160
|
-
@status[:residues_in]
|
|
160
|
+
@status[:residues_in] += record[:SEQ].length
|
|
161
161
|
|
|
162
162
|
record[:SEQ].upcase.chars.each_with_index do |char, i|
|
|
163
163
|
c = char.to_sym
|
|
164
164
|
@counts[i][c] += 1
|
|
165
|
-
@total[i]
|
|
165
|
+
@total[i] += 1
|
|
166
166
|
@residues.add(c)
|
|
167
167
|
end
|
|
168
168
|
end
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -113,7 +113,7 @@ module BioDSL
|
|
|
113
113
|
STATS = %i(records_in records_out sequences_in sequences_out residues_in
|
|
114
114
|
residues_out)
|
|
115
115
|
|
|
116
|
-
SCORES_MAX = 100_000
|
|
116
|
+
SCORES_MAX = 100_000 # Maximum score string length.
|
|
117
117
|
|
|
118
118
|
# Constructor for PlotScores.
|
|
119
119
|
#
|
|
@@ -180,9 +180,9 @@ module BioDSL
|
|
|
180
180
|
# Set default options.
|
|
181
181
|
def default
|
|
182
182
|
@options[:terminal] ||= :dumb
|
|
183
|
-
@options[:title]
|
|
184
|
-
@options[:xlabel]
|
|
185
|
-
@options[:ylabel]
|
|
183
|
+
@options[:title] ||= 'Mean Quality Scores'
|
|
184
|
+
@options[:xlabel] ||= 'Sequence Position'
|
|
185
|
+
@options[:ylabel] ||= 'Mean Score'
|
|
186
186
|
end
|
|
187
187
|
|
|
188
188
|
# Collect plot data from a given record.
|
|
@@ -196,7 +196,7 @@ module BioDSL
|
|
|
196
196
|
|
|
197
197
|
score_vec = NArray.to_na(scores, 'byte') - Seq::SCORE_BASE
|
|
198
198
|
@scores_vec[0...scores.length] += score_vec
|
|
199
|
-
@count_vec[0...scores.length]
|
|
199
|
+
@count_vec[0...scores.length] += 1
|
|
200
200
|
|
|
201
201
|
@max = scores.length if scores.length > @max
|
|
202
202
|
end
|
|
@@ -212,7 +212,7 @@ module BioDSL
|
|
|
212
212
|
|
|
213
213
|
# Prepare data to plot.
|
|
214
214
|
def prepare_plot_data
|
|
215
|
-
@max = 1 if @max == 0
|
|
215
|
+
@max = 1 if @max == 0 # ugly fix to avaid index error
|
|
216
216
|
|
|
217
217
|
count_vec = @count_vec[0...@max].to_f
|
|
218
218
|
count_vec *= (Seq::SCORE_MAX / @count_vec.max(0).to_f)
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -142,11 +142,11 @@ module BioDSL
|
|
|
142
142
|
|
|
143
143
|
input.each do |record|
|
|
144
144
|
output << record
|
|
145
|
-
@status[:records_in]
|
|
145
|
+
@status[:records_in] += 1
|
|
146
146
|
|
|
147
147
|
if record[:SEQ]
|
|
148
148
|
@status[:sequences_in] += 1
|
|
149
|
-
@status[:residues_in]
|
|
149
|
+
@status[:residues_in] += record[:SEQ].length
|
|
150
150
|
end
|
|
151
151
|
end
|
|
152
152
|
end
|
|
@@ -165,9 +165,9 @@ module BioDSL
|
|
|
165
165
|
break if @count == first
|
|
166
166
|
output << entry.to_bp
|
|
167
167
|
|
|
168
|
-
@status[:records_out]
|
|
168
|
+
@status[:records_out] += 1
|
|
169
169
|
@status[:sequences_out] += 1
|
|
170
|
-
@status[:residues_out]
|
|
170
|
+
@status[:residues_out] += entry.length
|
|
171
171
|
|
|
172
172
|
@count += 1
|
|
173
173
|
end
|
|
@@ -200,9 +200,9 @@ module BioDSL
|
|
|
200
200
|
input.each do |entry|
|
|
201
201
|
output << entry.to_bp
|
|
202
202
|
|
|
203
|
-
@status[:records_out]
|
|
203
|
+
@status[:records_out] += 1
|
|
204
204
|
@status[:sequences_out] += 1
|
|
205
|
-
@status[:residues_out]
|
|
205
|
+
@status[:residues_out] += entry.length
|
|
206
206
|
end
|
|
207
207
|
end
|
|
208
208
|
|
|
@@ -213,9 +213,9 @@ module BioDSL
|
|
|
213
213
|
@buffer.each do |entry|
|
|
214
214
|
output << entry.to_bp
|
|
215
215
|
|
|
216
|
-
@status[:records_out]
|
|
216
|
+
@status[:records_out] += 1
|
|
217
217
|
@status[:sequences_out] += 1
|
|
218
|
-
@status[:residues_out]
|
|
218
|
+
@status[:residues_out] += entry.length
|
|
219
219
|
end
|
|
220
220
|
end
|
|
221
221
|
end
|