BioDSL 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/BioDSL.gemspec +1 -1
- data/Gemfile +6 -0
- data/README.md +289 -155
- data/Rakefile +18 -16
- data/lib/BioDSL.rb +1 -1
- data/lib/BioDSL/cary.rb +78 -53
- data/lib/BioDSL/command.rb +2 -2
- data/lib/BioDSL/commands.rb +1 -1
- data/lib/BioDSL/commands/add_key.rb +1 -1
- data/lib/BioDSL/commands/align_seq_mothur.rb +4 -4
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +5 -5
- data/lib/BioDSL/commands/assemble_pairs.rb +13 -13
- data/lib/BioDSL/commands/assemble_seq_idba.rb +7 -9
- data/lib/BioDSL/commands/assemble_seq_ray.rb +13 -13
- data/lib/BioDSL/commands/assemble_seq_spades.rb +4 -4
- data/lib/BioDSL/commands/classify_seq.rb +8 -8
- data/lib/BioDSL/commands/classify_seq_mothur.rb +5 -5
- data/lib/BioDSL/commands/clip_primer.rb +7 -7
- data/lib/BioDSL/commands/cluster_otus.rb +5 -5
- data/lib/BioDSL/commands/collapse_otus.rb +2 -2
- data/lib/BioDSL/commands/collect_otus.rb +2 -2
- data/lib/BioDSL/commands/complement_seq.rb +4 -4
- data/lib/BioDSL/commands/count.rb +1 -1
- data/lib/BioDSL/commands/count_values.rb +2 -2
- data/lib/BioDSL/commands/degap_seq.rb +6 -7
- data/lib/BioDSL/commands/dereplicate_seq.rb +1 -1
- data/lib/BioDSL/commands/dump.rb +2 -2
- data/lib/BioDSL/commands/filter_rrna.rb +4 -4
- data/lib/BioDSL/commands/genecall.rb +7 -7
- data/lib/BioDSL/commands/grab.rb +1 -1
- data/lib/BioDSL/commands/index_taxonomy.rb +3 -3
- data/lib/BioDSL/commands/mask_seq.rb +4 -4
- data/lib/BioDSL/commands/mean_scores.rb +2 -2
- data/lib/BioDSL/commands/merge_pair_seq.rb +3 -3
- data/lib/BioDSL/commands/merge_table.rb +1 -1
- data/lib/BioDSL/commands/merge_values.rb +1 -1
- data/lib/BioDSL/commands/plot_heatmap.rb +4 -5
- data/lib/BioDSL/commands/plot_histogram.rb +4 -4
- data/lib/BioDSL/commands/plot_matches.rb +5 -5
- data/lib/BioDSL/commands/plot_residue_distribution.rb +6 -6
- data/lib/BioDSL/commands/plot_scores.rb +7 -7
- data/lib/BioDSL/commands/random.rb +1 -1
- data/lib/BioDSL/commands/read_fasta.rb +9 -9
- data/lib/BioDSL/commands/read_fastq.rb +16 -16
- data/lib/BioDSL/commands/read_table.rb +2 -3
- data/lib/BioDSL/commands/reverse_seq.rb +4 -4
- data/lib/BioDSL/commands/slice_align.rb +4 -4
- data/lib/BioDSL/commands/slice_seq.rb +3 -3
- data/lib/BioDSL/commands/sort.rb +1 -1
- data/lib/BioDSL/commands/split_pair_seq.rb +6 -7
- data/lib/BioDSL/commands/split_values.rb +2 -2
- data/lib/BioDSL/commands/trim_primer.rb +13 -8
- data/lib/BioDSL/commands/trim_seq.rb +5 -5
- data/lib/BioDSL/commands/uchime_ref.rb +6 -6
- data/lib/BioDSL/commands/uclust.rb +5 -5
- data/lib/BioDSL/commands/unique_values.rb +1 -1
- data/lib/BioDSL/commands/usearch_global.rb +2 -2
- data/lib/BioDSL/commands/usearch_local.rb +2 -2
- data/lib/BioDSL/commands/write_fasta.rb +7 -9
- data/lib/BioDSL/commands/write_fastq.rb +4 -4
- data/lib/BioDSL/commands/write_table.rb +3 -3
- data/lib/BioDSL/commands/write_tree.rb +2 -3
- data/lib/BioDSL/config.rb +2 -2
- data/lib/BioDSL/csv.rb +8 -10
- data/lib/BioDSL/debug.rb +1 -1
- data/lib/BioDSL/fasta.rb +54 -40
- data/lib/BioDSL/fastq.rb +35 -32
- data/lib/BioDSL/filesys.rb +56 -47
- data/lib/BioDSL/fork.rb +1 -1
- data/lib/BioDSL/hamming.rb +1 -1
- data/lib/BioDSL/helpers.rb +1 -1
- data/lib/BioDSL/helpers/aux_helper.rb +1 -1
- data/lib/BioDSL/helpers/email_helper.rb +1 -1
- data/lib/BioDSL/helpers/history_helper.rb +1 -1
- data/lib/BioDSL/helpers/log_helper.rb +1 -1
- data/lib/BioDSL/helpers/options_helper.rb +1 -1
- data/lib/BioDSL/helpers/status_helper.rb +1 -1
- data/lib/BioDSL/html_report.rb +1 -1
- data/lib/BioDSL/math.rb +1 -1
- data/lib/BioDSL/mummer.rb +1 -1
- data/lib/BioDSL/pipeline.rb +1 -1
- data/lib/BioDSL/seq.rb +240 -231
- data/lib/BioDSL/seq/ambiguity.rb +1 -1
- data/lib/BioDSL/seq/assemble.rb +1 -1
- data/lib/BioDSL/seq/backtrack.rb +93 -76
- data/lib/BioDSL/seq/digest.rb +1 -1
- data/lib/BioDSL/seq/dynamic.rb +43 -55
- data/lib/BioDSL/seq/homopolymer.rb +34 -36
- data/lib/BioDSL/seq/kmer.rb +67 -50
- data/lib/BioDSL/seq/levenshtein.rb +35 -40
- data/lib/BioDSL/seq/translate.rb +64 -55
- data/lib/BioDSL/seq/trim.rb +60 -50
- data/lib/BioDSL/serializer.rb +1 -1
- data/lib/BioDSL/stream.rb +1 -1
- data/lib/BioDSL/taxonomy.rb +1 -1
- data/lib/BioDSL/test.rb +1 -1
- data/lib/BioDSL/tmp_dir.rb +1 -1
- data/lib/BioDSL/usearch.rb +1 -1
- data/lib/BioDSL/verbose.rb +1 -1
- data/lib/BioDSL/version.rb +2 -2
- data/test/BioDSL/commands/test_add_key.rb +1 -1
- data/test/BioDSL/commands/test_align_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +1 -1
- data/test/BioDSL/commands/test_assemble_pairs.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_idba.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_ray.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_spades.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_clip_primer.rb +1 -1
- data/test/BioDSL/commands/test_cluster_otus.rb +1 -1
- data/test/BioDSL/commands/test_collapse_otus.rb +1 -1
- data/test/BioDSL/commands/test_collect_otus.rb +1 -1
- data/test/BioDSL/commands/test_complement_seq.rb +1 -1
- data/test/BioDSL/commands/test_count.rb +1 -1
- data/test/BioDSL/commands/test_count_values.rb +1 -1
- data/test/BioDSL/commands/test_degap_seq.rb +1 -1
- data/test/BioDSL/commands/test_dereplicate_seq.rb +1 -1
- data/test/BioDSL/commands/test_dump.rb +1 -1
- data/test/BioDSL/commands/test_filter_rrna.rb +1 -1
- data/test/BioDSL/commands/test_genecall.rb +1 -1
- data/test/BioDSL/commands/test_grab.rb +1 -1
- data/test/BioDSL/commands/test_index_taxonomy.rb +1 -1
- data/test/BioDSL/commands/test_mask_seq.rb +1 -1
- data/test/BioDSL/commands/test_mean_scores.rb +1 -1
- data/test/BioDSL/commands/test_merge_pair_seq.rb +1 -1
- data/test/BioDSL/commands/test_merge_table.rb +1 -1
- data/test/BioDSL/commands/test_merge_values.rb +1 -1
- data/test/BioDSL/commands/test_plot_heatmap.rb +1 -1
- data/test/BioDSL/commands/test_plot_histogram.rb +1 -1
- data/test/BioDSL/commands/test_plot_matches.rb +1 -1
- data/test/BioDSL/commands/test_plot_residue_distribution.rb +1 -1
- data/test/BioDSL/commands/test_plot_scores.rb +1 -1
- data/test/BioDSL/commands/test_random.rb +1 -1
- data/test/BioDSL/commands/test_read_fasta.rb +1 -1
- data/test/BioDSL/commands/test_read_fastq.rb +1 -1
- data/test/BioDSL/commands/test_read_table.rb +1 -1
- data/test/BioDSL/commands/test_reverse_seq.rb +1 -1
- data/test/BioDSL/commands/test_slice_align.rb +1 -1
- data/test/BioDSL/commands/test_slice_seq.rb +1 -1
- data/test/BioDSL/commands/test_sort.rb +1 -1
- data/test/BioDSL/commands/test_split_pair_seq.rb +1 -1
- data/test/BioDSL/commands/test_split_values.rb +1 -1
- data/test/BioDSL/commands/test_trim_primer.rb +1 -1
- data/test/BioDSL/commands/test_trim_seq.rb +1 -1
- data/test/BioDSL/commands/test_uchime_ref.rb +1 -1
- data/test/BioDSL/commands/test_uclust.rb +1 -1
- data/test/BioDSL/commands/test_unique_values.rb +1 -1
- data/test/BioDSL/commands/test_usearch_global.rb +1 -1
- data/test/BioDSL/commands/test_usearch_local.rb +1 -1
- data/test/BioDSL/commands/test_write_fasta.rb +1 -1
- data/test/BioDSL/commands/test_write_fastq.rb +1 -1
- data/test/BioDSL/commands/test_write_table.rb +1 -1
- data/test/BioDSL/commands/test_write_tree.rb +1 -1
- data/test/BioDSL/helpers/test_options_helper.rb +3 -3
- data/test/BioDSL/seq/test_assemble.rb +58 -56
- data/test/BioDSL/seq/test_backtrack.rb +83 -81
- data/test/BioDSL/seq/test_digest.rb +47 -45
- data/test/BioDSL/seq/test_dynamic.rb +66 -64
- data/test/BioDSL/seq/test_homopolymer.rb +35 -33
- data/test/BioDSL/seq/test_kmer.rb +29 -28
- data/test/BioDSL/seq/test_translate.rb +44 -42
- data/test/BioDSL/seq/test_trim.rb +59 -57
- data/test/BioDSL/test_cary.rb +1 -1
- data/test/BioDSL/test_command.rb +2 -2
- data/test/BioDSL/test_csv.rb +34 -31
- data/test/BioDSL/test_debug.rb +31 -31
- data/test/BioDSL/test_fasta.rb +30 -29
- data/test/BioDSL/test_fastq.rb +27 -26
- data/test/BioDSL/test_filesys.rb +28 -27
- data/test/BioDSL/test_fork.rb +29 -28
- data/test/BioDSL/test_math.rb +31 -30
- data/test/BioDSL/test_mummer.rb +1 -1
- data/test/BioDSL/test_pipeline.rb +1 -1
- data/test/BioDSL/test_seq.rb +42 -41
- data/test/BioDSL/test_serializer.rb +35 -33
- data/test/BioDSL/test_stream.rb +28 -27
- data/test/BioDSL/test_taxonomy.rb +38 -37
- data/test/BioDSL/test_test.rb +32 -31
- data/test/BioDSL/test_tmp_dir.rb +1 -1
- data/test/BioDSL/test_usearch.rb +28 -27
- data/test/BioDSL/test_verbose.rb +32 -31
- data/test/helper.rb +34 -31
- metadata +3 -2
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
data/lib/BioDSL/commands/dump.rb
CHANGED
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -63,7 +63,7 @@ module BioDSL
|
|
63
63
|
#
|
64
64
|
# @return [Dump] Returns an instance of the Dump class.
|
65
65
|
def initialize(options)
|
66
|
-
@options
|
66
|
+
@options = options
|
67
67
|
|
68
68
|
check_options
|
69
69
|
end
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -197,7 +197,7 @@ module BioDSL
|
|
197
197
|
def record2entry(record, i)
|
198
198
|
entry = BioDSL::Seq.new(seq_name: i, seq: record[:SEQ])
|
199
199
|
@status[:sequences_in] += 1
|
200
|
-
@status[:residues_in]
|
200
|
+
@status[:residues_in] += entry.length
|
201
201
|
entry
|
202
202
|
end
|
203
203
|
|
@@ -226,9 +226,9 @@ module BioDSL
|
|
226
226
|
if record.key? :SEQ
|
227
227
|
unless @filter.include? i
|
228
228
|
output << record
|
229
|
-
@status[:records_out]
|
229
|
+
@status[:records_out] += 1
|
230
230
|
@status[:sequences_out] += 1
|
231
|
-
@status[:residues_out]
|
231
|
+
@status[:residues_out] += record[:SEQ].length
|
232
232
|
end
|
233
233
|
else
|
234
234
|
output << record
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -160,7 +160,7 @@ module BioDSL
|
|
160
160
|
|
161
161
|
# Set the default option values.
|
162
162
|
def defaults
|
163
|
-
@options[:type]
|
163
|
+
@options[:type] ||= :dna
|
164
164
|
@options[:procedure] ||= :single
|
165
165
|
end
|
166
166
|
|
@@ -179,10 +179,10 @@ module BioDSL
|
|
179
179
|
entry = BioDSL::Seq.new(seq_name: i, seq: record[:SEQ])
|
180
180
|
@names[i] = record[:SEQ_NAME] || i
|
181
181
|
|
182
|
-
@status[:sequences_in]
|
182
|
+
@status[:sequences_in] += 1
|
183
183
|
@status[:sequences_out] += 1
|
184
|
-
@status[:residues_in]
|
185
|
-
@status[:residues_out]
|
184
|
+
@status[:residues_in] += entry.length
|
185
|
+
@status[:residues_out] += entry.length
|
186
186
|
|
187
187
|
fasta_io.puts entry.to_fasta
|
188
188
|
end
|
@@ -205,9 +205,9 @@ module BioDSL
|
|
205
205
|
ios.each do |entry|
|
206
206
|
output << parse_entry(entry)
|
207
207
|
|
208
|
-
@status[:records_out]
|
208
|
+
@status[:records_out] += 1
|
209
209
|
@status[:sequences_out] += 1
|
210
|
-
@status[:residues_out]
|
210
|
+
@status[:residues_out] += entry.length
|
211
211
|
end
|
212
212
|
end
|
213
213
|
end
|
data/lib/BioDSL/commands/grab.rb
CHANGED
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -139,7 +139,7 @@ module BioDSL
|
|
139
139
|
create_output_dir
|
140
140
|
check_output_files
|
141
141
|
|
142
|
-
@index
|
142
|
+
@index = BioDSL::Taxonomy::Index.new(options)
|
143
143
|
end
|
144
144
|
|
145
145
|
# Return command lambda for index_taxonomy.
|
@@ -179,7 +179,7 @@ module BioDSL
|
|
179
179
|
|
180
180
|
# Set the default options hash values.
|
181
181
|
def defaults
|
182
|
-
@options[:prefix]
|
182
|
+
@options[:prefix] ||= 'taxonomy'
|
183
183
|
@options[:kmer_size] ||= 8
|
184
184
|
@options[:step_size] ||= 1
|
185
185
|
end
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -136,7 +136,7 @@ module BioDSL
|
|
136
136
|
# Set default options.
|
137
137
|
def defaults
|
138
138
|
@options[:quality_min] ||= 20
|
139
|
-
@options[:mask]
|
139
|
+
@options[:mask] ||= :soft
|
140
140
|
end
|
141
141
|
|
142
142
|
# Mask sequence in given record.
|
@@ -146,12 +146,12 @@ module BioDSL
|
|
146
146
|
entry = BioDSL::Seq.new_bp(record)
|
147
147
|
|
148
148
|
@status[:sequences_in] += 1
|
149
|
-
@status[:residues_in]
|
149
|
+
@status[:residues_in] += entry.length
|
150
150
|
|
151
151
|
@mask == :soft ? mask_seq_soft(entry) : mask_seq_hard(entry)
|
152
152
|
|
153
153
|
@status[:sequences_out] += 1
|
154
|
-
@status[:residues_out]
|
154
|
+
@status[:residues_out] += entry.length
|
155
155
|
|
156
156
|
record.merge! entry.to_bp
|
157
157
|
end
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -159,7 +159,7 @@ module BioDSL
|
|
159
159
|
record[:SCORES_MEAN] = mean
|
160
160
|
end
|
161
161
|
|
162
|
-
@sum
|
162
|
+
@sum += mean
|
163
163
|
@status[:min_mean] = mean if mean < @status[:min_mean]
|
164
164
|
@status[:max_mean] = mean if mean > @status[:max_mean]
|
165
165
|
@count += 1
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -122,9 +122,9 @@ module BioDSL
|
|
122
122
|
if record1[:SEQ] && record2[:SEQ]
|
123
123
|
output << merge_pair_seq(record1, record2)
|
124
124
|
|
125
|
-
@status[:sequences_in]
|
125
|
+
@status[:sequences_in] += 2
|
126
126
|
@status[:sequences_out] += 1
|
127
|
-
@status[:records_out]
|
127
|
+
@status[:records_out] += 1
|
128
128
|
else
|
129
129
|
output.puts record1, record2
|
130
130
|
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -66,7 +66,6 @@ module BioDSL
|
|
66
66
|
#
|
67
67
|
# BD.new.read_table(input: "test.tab").plot_heatmap.run
|
68
68
|
#
|
69
|
-
# rubocop:disable ClassLength
|
70
69
|
class PlotHeatmap
|
71
70
|
require 'gnuplotter'
|
72
71
|
require 'set'
|
@@ -133,9 +132,9 @@ module BioDSL
|
|
133
132
|
# Set default options.
|
134
133
|
def defaults
|
135
134
|
@options[:terminal] ||= :dumb
|
136
|
-
@options[:title]
|
137
|
-
@options[:xlabel]
|
138
|
-
@options[:ylabel]
|
135
|
+
@options[:title] ||= 'Heatmap'
|
136
|
+
@options[:xlabel] ||= 'x'
|
137
|
+
@options[:ylabel] ||= 'y'
|
139
138
|
end
|
140
139
|
|
141
140
|
# Compile a set of keys to skip.
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -167,9 +167,9 @@ module BioDSL
|
|
167
167
|
# Set default values for options hash.
|
168
168
|
def defaults
|
169
169
|
@options[:terminal] ||= :dumb
|
170
|
-
@options[:title]
|
171
|
-
@options[:xlabel]
|
172
|
-
@options[:ylabel]
|
170
|
+
@options[:title] ||= 'Histogram'
|
171
|
+
@options[:xlabel] ||= @options[:key]
|
172
|
+
@options[:ylabel] ||= 'n'
|
173
173
|
|
174
174
|
@options[:ylogscale] &&
|
175
175
|
@options[:ylabel] = "log10(#{@options[:ylabel]})"
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -180,10 +180,10 @@ module BioDSL
|
|
180
180
|
# Set default options.
|
181
181
|
def defaults
|
182
182
|
@options[:direction] ||= :both
|
183
|
-
@options[:terminal]
|
184
|
-
@options[:title]
|
185
|
-
@options[:xlabel]
|
186
|
-
@options[:ylabel]
|
183
|
+
@options[:terminal] ||= :dumb
|
184
|
+
@options[:title] ||= 'Matches'
|
185
|
+
@options[:xlabel] ||= 'x'
|
186
|
+
@options[:ylabel] ||= 'y'
|
187
187
|
end
|
188
188
|
|
189
189
|
# Set plot default attributes.
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -147,9 +147,9 @@ module BioDSL
|
|
147
147
|
# Set default options.
|
148
148
|
def defaults
|
149
149
|
@options[:terminal] ||= :dumb
|
150
|
-
@options[:title]
|
151
|
-
@options[:xlabel]
|
152
|
-
@options[:ylabel]
|
150
|
+
@options[:title] ||= 'Residue Distribution'
|
151
|
+
@options[:xlabel] ||= 'Sequence position'
|
152
|
+
@options[:ylabel] ||= '%'
|
153
153
|
end
|
154
154
|
|
155
155
|
# Given a record with a sequence count its residues.
|
@@ -157,12 +157,12 @@ module BioDSL
|
|
157
157
|
# @param record [Hash] BioDSL record
|
158
158
|
def count_residues(record)
|
159
159
|
@status[:sequences_in] += 1
|
160
|
-
@status[:residues_in]
|
160
|
+
@status[:residues_in] += record[:SEQ].length
|
161
161
|
|
162
162
|
record[:SEQ].upcase.chars.each_with_index do |char, i|
|
163
163
|
c = char.to_sym
|
164
164
|
@counts[i][c] += 1
|
165
|
-
@total[i]
|
165
|
+
@total[i] += 1
|
166
166
|
@residues.add(c)
|
167
167
|
end
|
168
168
|
end
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -113,7 +113,7 @@ module BioDSL
|
|
113
113
|
STATS = %i(records_in records_out sequences_in sequences_out residues_in
|
114
114
|
residues_out)
|
115
115
|
|
116
|
-
SCORES_MAX = 100_000
|
116
|
+
SCORES_MAX = 100_000 # Maximum score string length.
|
117
117
|
|
118
118
|
# Constructor for PlotScores.
|
119
119
|
#
|
@@ -180,9 +180,9 @@ module BioDSL
|
|
180
180
|
# Set default options.
|
181
181
|
def default
|
182
182
|
@options[:terminal] ||= :dumb
|
183
|
-
@options[:title]
|
184
|
-
@options[:xlabel]
|
185
|
-
@options[:ylabel]
|
183
|
+
@options[:title] ||= 'Mean Quality Scores'
|
184
|
+
@options[:xlabel] ||= 'Sequence Position'
|
185
|
+
@options[:ylabel] ||= 'Mean Score'
|
186
186
|
end
|
187
187
|
|
188
188
|
# Collect plot data from a given record.
|
@@ -196,7 +196,7 @@ module BioDSL
|
|
196
196
|
|
197
197
|
score_vec = NArray.to_na(scores, 'byte') - Seq::SCORE_BASE
|
198
198
|
@scores_vec[0...scores.length] += score_vec
|
199
|
-
@count_vec[0...scores.length]
|
199
|
+
@count_vec[0...scores.length] += 1
|
200
200
|
|
201
201
|
@max = scores.length if scores.length > @max
|
202
202
|
end
|
@@ -212,7 +212,7 @@ module BioDSL
|
|
212
212
|
|
213
213
|
# Prepare data to plot.
|
214
214
|
def prepare_plot_data
|
215
|
-
@max = 1 if @max == 0
|
215
|
+
@max = 1 if @max == 0 # ugly fix to avaid index error
|
216
216
|
|
217
217
|
count_vec = @count_vec[0...@max].to_f
|
218
218
|
count_vec *= (Seq::SCORE_MAX / @count_vec.max(0).to_f)
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -21,7 +21,7 @@
|
|
21
21
|
# #
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
23
23
|
# #
|
24
|
-
# This software is part of the BioDSL
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
25
25
|
# #
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
27
27
|
|
@@ -142,11 +142,11 @@ module BioDSL
|
|
142
142
|
|
143
143
|
input.each do |record|
|
144
144
|
output << record
|
145
|
-
@status[:records_in]
|
145
|
+
@status[:records_in] += 1
|
146
146
|
|
147
147
|
if record[:SEQ]
|
148
148
|
@status[:sequences_in] += 1
|
149
|
-
@status[:residues_in]
|
149
|
+
@status[:residues_in] += record[:SEQ].length
|
150
150
|
end
|
151
151
|
end
|
152
152
|
end
|
@@ -165,9 +165,9 @@ module BioDSL
|
|
165
165
|
break if @count == first
|
166
166
|
output << entry.to_bp
|
167
167
|
|
168
|
-
@status[:records_out]
|
168
|
+
@status[:records_out] += 1
|
169
169
|
@status[:sequences_out] += 1
|
170
|
-
@status[:residues_out]
|
170
|
+
@status[:residues_out] += entry.length
|
171
171
|
|
172
172
|
@count += 1
|
173
173
|
end
|
@@ -200,9 +200,9 @@ module BioDSL
|
|
200
200
|
input.each do |entry|
|
201
201
|
output << entry.to_bp
|
202
202
|
|
203
|
-
@status[:records_out]
|
203
|
+
@status[:records_out] += 1
|
204
204
|
@status[:sequences_out] += 1
|
205
|
-
@status[:residues_out]
|
205
|
+
@status[:residues_out] += entry.length
|
206
206
|
end
|
207
207
|
end
|
208
208
|
|
@@ -213,9 +213,9 @@ module BioDSL
|
|
213
213
|
@buffer.each do |entry|
|
214
214
|
output << entry.to_bp
|
215
215
|
|
216
|
-
@status[:records_out]
|
216
|
+
@status[:records_out] += 1
|
217
217
|
@status[:sequences_out] += 1
|
218
|
-
@status[:residues_out]
|
218
|
+
@status[:residues_out] += entry.length
|
219
219
|
end
|
220
220
|
end
|
221
221
|
end
|