BioDSL 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/BioDSL.gemspec +1 -1
- data/Gemfile +6 -0
- data/README.md +289 -155
- data/Rakefile +18 -16
- data/lib/BioDSL.rb +1 -1
- data/lib/BioDSL/cary.rb +78 -53
- data/lib/BioDSL/command.rb +2 -2
- data/lib/BioDSL/commands.rb +1 -1
- data/lib/BioDSL/commands/add_key.rb +1 -1
- data/lib/BioDSL/commands/align_seq_mothur.rb +4 -4
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +5 -5
- data/lib/BioDSL/commands/assemble_pairs.rb +13 -13
- data/lib/BioDSL/commands/assemble_seq_idba.rb +7 -9
- data/lib/BioDSL/commands/assemble_seq_ray.rb +13 -13
- data/lib/BioDSL/commands/assemble_seq_spades.rb +4 -4
- data/lib/BioDSL/commands/classify_seq.rb +8 -8
- data/lib/BioDSL/commands/classify_seq_mothur.rb +5 -5
- data/lib/BioDSL/commands/clip_primer.rb +7 -7
- data/lib/BioDSL/commands/cluster_otus.rb +5 -5
- data/lib/BioDSL/commands/collapse_otus.rb +2 -2
- data/lib/BioDSL/commands/collect_otus.rb +2 -2
- data/lib/BioDSL/commands/complement_seq.rb +4 -4
- data/lib/BioDSL/commands/count.rb +1 -1
- data/lib/BioDSL/commands/count_values.rb +2 -2
- data/lib/BioDSL/commands/degap_seq.rb +6 -7
- data/lib/BioDSL/commands/dereplicate_seq.rb +1 -1
- data/lib/BioDSL/commands/dump.rb +2 -2
- data/lib/BioDSL/commands/filter_rrna.rb +4 -4
- data/lib/BioDSL/commands/genecall.rb +7 -7
- data/lib/BioDSL/commands/grab.rb +1 -1
- data/lib/BioDSL/commands/index_taxonomy.rb +3 -3
- data/lib/BioDSL/commands/mask_seq.rb +4 -4
- data/lib/BioDSL/commands/mean_scores.rb +2 -2
- data/lib/BioDSL/commands/merge_pair_seq.rb +3 -3
- data/lib/BioDSL/commands/merge_table.rb +1 -1
- data/lib/BioDSL/commands/merge_values.rb +1 -1
- data/lib/BioDSL/commands/plot_heatmap.rb +4 -5
- data/lib/BioDSL/commands/plot_histogram.rb +4 -4
- data/lib/BioDSL/commands/plot_matches.rb +5 -5
- data/lib/BioDSL/commands/plot_residue_distribution.rb +6 -6
- data/lib/BioDSL/commands/plot_scores.rb +7 -7
- data/lib/BioDSL/commands/random.rb +1 -1
- data/lib/BioDSL/commands/read_fasta.rb +9 -9
- data/lib/BioDSL/commands/read_fastq.rb +16 -16
- data/lib/BioDSL/commands/read_table.rb +2 -3
- data/lib/BioDSL/commands/reverse_seq.rb +4 -4
- data/lib/BioDSL/commands/slice_align.rb +4 -4
- data/lib/BioDSL/commands/slice_seq.rb +3 -3
- data/lib/BioDSL/commands/sort.rb +1 -1
- data/lib/BioDSL/commands/split_pair_seq.rb +6 -7
- data/lib/BioDSL/commands/split_values.rb +2 -2
- data/lib/BioDSL/commands/trim_primer.rb +13 -8
- data/lib/BioDSL/commands/trim_seq.rb +5 -5
- data/lib/BioDSL/commands/uchime_ref.rb +6 -6
- data/lib/BioDSL/commands/uclust.rb +5 -5
- data/lib/BioDSL/commands/unique_values.rb +1 -1
- data/lib/BioDSL/commands/usearch_global.rb +2 -2
- data/lib/BioDSL/commands/usearch_local.rb +2 -2
- data/lib/BioDSL/commands/write_fasta.rb +7 -9
- data/lib/BioDSL/commands/write_fastq.rb +4 -4
- data/lib/BioDSL/commands/write_table.rb +3 -3
- data/lib/BioDSL/commands/write_tree.rb +2 -3
- data/lib/BioDSL/config.rb +2 -2
- data/lib/BioDSL/csv.rb +8 -10
- data/lib/BioDSL/debug.rb +1 -1
- data/lib/BioDSL/fasta.rb +54 -40
- data/lib/BioDSL/fastq.rb +35 -32
- data/lib/BioDSL/filesys.rb +56 -47
- data/lib/BioDSL/fork.rb +1 -1
- data/lib/BioDSL/hamming.rb +1 -1
- data/lib/BioDSL/helpers.rb +1 -1
- data/lib/BioDSL/helpers/aux_helper.rb +1 -1
- data/lib/BioDSL/helpers/email_helper.rb +1 -1
- data/lib/BioDSL/helpers/history_helper.rb +1 -1
- data/lib/BioDSL/helpers/log_helper.rb +1 -1
- data/lib/BioDSL/helpers/options_helper.rb +1 -1
- data/lib/BioDSL/helpers/status_helper.rb +1 -1
- data/lib/BioDSL/html_report.rb +1 -1
- data/lib/BioDSL/math.rb +1 -1
- data/lib/BioDSL/mummer.rb +1 -1
- data/lib/BioDSL/pipeline.rb +1 -1
- data/lib/BioDSL/seq.rb +240 -231
- data/lib/BioDSL/seq/ambiguity.rb +1 -1
- data/lib/BioDSL/seq/assemble.rb +1 -1
- data/lib/BioDSL/seq/backtrack.rb +93 -76
- data/lib/BioDSL/seq/digest.rb +1 -1
- data/lib/BioDSL/seq/dynamic.rb +43 -55
- data/lib/BioDSL/seq/homopolymer.rb +34 -36
- data/lib/BioDSL/seq/kmer.rb +67 -50
- data/lib/BioDSL/seq/levenshtein.rb +35 -40
- data/lib/BioDSL/seq/translate.rb +64 -55
- data/lib/BioDSL/seq/trim.rb +60 -50
- data/lib/BioDSL/serializer.rb +1 -1
- data/lib/BioDSL/stream.rb +1 -1
- data/lib/BioDSL/taxonomy.rb +1 -1
- data/lib/BioDSL/test.rb +1 -1
- data/lib/BioDSL/tmp_dir.rb +1 -1
- data/lib/BioDSL/usearch.rb +1 -1
- data/lib/BioDSL/verbose.rb +1 -1
- data/lib/BioDSL/version.rb +2 -2
- data/test/BioDSL/commands/test_add_key.rb +1 -1
- data/test/BioDSL/commands/test_align_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +1 -1
- data/test/BioDSL/commands/test_assemble_pairs.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_idba.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_ray.rb +1 -1
- data/test/BioDSL/commands/test_assemble_seq_spades.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq.rb +1 -1
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +1 -1
- data/test/BioDSL/commands/test_clip_primer.rb +1 -1
- data/test/BioDSL/commands/test_cluster_otus.rb +1 -1
- data/test/BioDSL/commands/test_collapse_otus.rb +1 -1
- data/test/BioDSL/commands/test_collect_otus.rb +1 -1
- data/test/BioDSL/commands/test_complement_seq.rb +1 -1
- data/test/BioDSL/commands/test_count.rb +1 -1
- data/test/BioDSL/commands/test_count_values.rb +1 -1
- data/test/BioDSL/commands/test_degap_seq.rb +1 -1
- data/test/BioDSL/commands/test_dereplicate_seq.rb +1 -1
- data/test/BioDSL/commands/test_dump.rb +1 -1
- data/test/BioDSL/commands/test_filter_rrna.rb +1 -1
- data/test/BioDSL/commands/test_genecall.rb +1 -1
- data/test/BioDSL/commands/test_grab.rb +1 -1
- data/test/BioDSL/commands/test_index_taxonomy.rb +1 -1
- data/test/BioDSL/commands/test_mask_seq.rb +1 -1
- data/test/BioDSL/commands/test_mean_scores.rb +1 -1
- data/test/BioDSL/commands/test_merge_pair_seq.rb +1 -1
- data/test/BioDSL/commands/test_merge_table.rb +1 -1
- data/test/BioDSL/commands/test_merge_values.rb +1 -1
- data/test/BioDSL/commands/test_plot_heatmap.rb +1 -1
- data/test/BioDSL/commands/test_plot_histogram.rb +1 -1
- data/test/BioDSL/commands/test_plot_matches.rb +1 -1
- data/test/BioDSL/commands/test_plot_residue_distribution.rb +1 -1
- data/test/BioDSL/commands/test_plot_scores.rb +1 -1
- data/test/BioDSL/commands/test_random.rb +1 -1
- data/test/BioDSL/commands/test_read_fasta.rb +1 -1
- data/test/BioDSL/commands/test_read_fastq.rb +1 -1
- data/test/BioDSL/commands/test_read_table.rb +1 -1
- data/test/BioDSL/commands/test_reverse_seq.rb +1 -1
- data/test/BioDSL/commands/test_slice_align.rb +1 -1
- data/test/BioDSL/commands/test_slice_seq.rb +1 -1
- data/test/BioDSL/commands/test_sort.rb +1 -1
- data/test/BioDSL/commands/test_split_pair_seq.rb +1 -1
- data/test/BioDSL/commands/test_split_values.rb +1 -1
- data/test/BioDSL/commands/test_trim_primer.rb +1 -1
- data/test/BioDSL/commands/test_trim_seq.rb +1 -1
- data/test/BioDSL/commands/test_uchime_ref.rb +1 -1
- data/test/BioDSL/commands/test_uclust.rb +1 -1
- data/test/BioDSL/commands/test_unique_values.rb +1 -1
- data/test/BioDSL/commands/test_usearch_global.rb +1 -1
- data/test/BioDSL/commands/test_usearch_local.rb +1 -1
- data/test/BioDSL/commands/test_write_fasta.rb +1 -1
- data/test/BioDSL/commands/test_write_fastq.rb +1 -1
- data/test/BioDSL/commands/test_write_table.rb +1 -1
- data/test/BioDSL/commands/test_write_tree.rb +1 -1
- data/test/BioDSL/helpers/test_options_helper.rb +3 -3
- data/test/BioDSL/seq/test_assemble.rb +58 -56
- data/test/BioDSL/seq/test_backtrack.rb +83 -81
- data/test/BioDSL/seq/test_digest.rb +47 -45
- data/test/BioDSL/seq/test_dynamic.rb +66 -64
- data/test/BioDSL/seq/test_homopolymer.rb +35 -33
- data/test/BioDSL/seq/test_kmer.rb +29 -28
- data/test/BioDSL/seq/test_translate.rb +44 -42
- data/test/BioDSL/seq/test_trim.rb +59 -57
- data/test/BioDSL/test_cary.rb +1 -1
- data/test/BioDSL/test_command.rb +2 -2
- data/test/BioDSL/test_csv.rb +34 -31
- data/test/BioDSL/test_debug.rb +31 -31
- data/test/BioDSL/test_fasta.rb +30 -29
- data/test/BioDSL/test_fastq.rb +27 -26
- data/test/BioDSL/test_filesys.rb +28 -27
- data/test/BioDSL/test_fork.rb +29 -28
- data/test/BioDSL/test_math.rb +31 -30
- data/test/BioDSL/test_mummer.rb +1 -1
- data/test/BioDSL/test_pipeline.rb +1 -1
- data/test/BioDSL/test_seq.rb +42 -41
- data/test/BioDSL/test_serializer.rb +35 -33
- data/test/BioDSL/test_stream.rb +28 -27
- data/test/BioDSL/test_taxonomy.rb +38 -37
- data/test/BioDSL/test_test.rb +32 -31
- data/test/BioDSL/test_tmp_dir.rb +1 -1
- data/test/BioDSL/test_usearch.rb +28 -27
- data/test/BioDSL/test_verbose.rb +32 -31
- data/test/helper.rb +34 -31
- metadata +3 -2
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -162,7 +162,7 @@ module BioDSL
|
|
|
162
162
|
ios.each(:uc) do |record|
|
|
163
163
|
record[:RECORD_TYPE] = 'usearch'
|
|
164
164
|
output << record
|
|
165
|
-
@status[:hits_out]
|
|
165
|
+
@status[:hits_out] += 1
|
|
166
166
|
@status[:records_out] += 1
|
|
167
167
|
end
|
|
168
168
|
end
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -162,7 +162,7 @@ module BioDSL
|
|
|
162
162
|
ios.each(:uc) do |record|
|
|
163
163
|
record[:RECORD_TYPE] = 'usearch'
|
|
164
164
|
output << record
|
|
165
|
-
@status[:hits_out]
|
|
165
|
+
@status[:hits_out] += 1
|
|
166
166
|
@status[:records_out] += 1
|
|
167
167
|
end
|
|
168
168
|
end
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -137,18 +137,16 @@ module BioDSL
|
|
|
137
137
|
|
|
138
138
|
if (entry = record2entry(record))
|
|
139
139
|
$stdout.puts entry.to_fasta(wrap)
|
|
140
|
-
@status[:sequences_in]
|
|
140
|
+
@status[:sequences_in] += 1
|
|
141
141
|
@status[:sequences_out] += 1
|
|
142
|
-
@status[:residues_in]
|
|
143
|
-
@status[:residues_out]
|
|
142
|
+
@status[:residues_in] += entry.length
|
|
143
|
+
@status[:residues_out] += entry.length
|
|
144
144
|
end
|
|
145
145
|
|
|
146
146
|
write_output(output, record)
|
|
147
147
|
end
|
|
148
148
|
end
|
|
149
149
|
|
|
150
|
-
# rubocop: disable Metrics/AbcSize
|
|
151
|
-
|
|
152
150
|
# Write all sequence entries to a specified file.
|
|
153
151
|
#
|
|
154
152
|
# @param input [Enumerator] The input stream.
|
|
@@ -160,10 +158,10 @@ module BioDSL
|
|
|
160
158
|
|
|
161
159
|
if (entry = record2entry(record))
|
|
162
160
|
ios.puts entry.to_fasta(@options[:wrap])
|
|
163
|
-
@status[:sequences_in]
|
|
161
|
+
@status[:sequences_in] += 1
|
|
164
162
|
@status[:sequences_out] += 1
|
|
165
|
-
@status[:residues_in]
|
|
166
|
-
@status[:residues_out]
|
|
163
|
+
@status[:residues_in] += entry.length
|
|
164
|
+
@status[:residues_out] += entry.length
|
|
167
165
|
end
|
|
168
166
|
|
|
169
167
|
write_output(output, record)
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -91,7 +91,7 @@ module BioDSL
|
|
|
91
91
|
#
|
|
92
92
|
# @return [WriteFastq] Class instance.
|
|
93
93
|
def initialize(options)
|
|
94
|
-
@options
|
|
94
|
+
@options = options
|
|
95
95
|
check_options
|
|
96
96
|
@options[:output] ||= $stdout
|
|
97
97
|
@compress = choose_compression
|
|
@@ -139,7 +139,7 @@ module BioDSL
|
|
|
139
139
|
|
|
140
140
|
if record[:SEQ]
|
|
141
141
|
@status[:sequences_in] += 1
|
|
142
|
-
@status[:residues_in]
|
|
142
|
+
@status[:residues_in] += record[:SEQ].length
|
|
143
143
|
|
|
144
144
|
write_fastq(record, ios) if record[:SEQ_NAME] && record[:SCORES]
|
|
145
145
|
end
|
|
@@ -162,7 +162,7 @@ module BioDSL
|
|
|
162
162
|
|
|
163
163
|
ios.puts entry.to_fastq
|
|
164
164
|
@status[:sequences_out] += 1
|
|
165
|
-
@status[:residues_out]
|
|
165
|
+
@status[:residues_out] += entry.length
|
|
166
166
|
end
|
|
167
167
|
|
|
168
168
|
# Choose compression to use which can either be gzip or bzip2 or no
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -185,7 +185,7 @@ module BioDSL
|
|
|
185
185
|
#
|
|
186
186
|
# @return [WriteTable] Class instance.
|
|
187
187
|
def initialize(options)
|
|
188
|
-
@options
|
|
188
|
+
@options = options
|
|
189
189
|
check_options
|
|
190
190
|
@options[:delimiter] ||= "\t"
|
|
191
191
|
@compress = choose_compression
|
|
@@ -402,7 +402,7 @@ module BioDSL
|
|
|
402
402
|
def align_columns(table)
|
|
403
403
|
@rows.first.each_with_index do |cell, i|
|
|
404
404
|
next unless cell.is_a?(Fixnum) ||
|
|
405
|
-
cell.is_a?(Float)
|
|
405
|
+
cell.is_a?(Float) ||
|
|
406
406
|
cell.delete(',') =~ /^[0-9]+$/
|
|
407
407
|
|
|
408
408
|
table.align_column(i, :right)
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -79,7 +79,6 @@ module BioDSL
|
|
|
79
79
|
@cmd = compile_command
|
|
80
80
|
end
|
|
81
81
|
|
|
82
|
-
# rubocop: disable Metrics/AbcSize
|
|
83
82
|
# rubocop: disable MethodLength
|
|
84
83
|
|
|
85
84
|
# Return command lambda for write_tree.
|
|
@@ -146,7 +145,7 @@ module BioDSL
|
|
|
146
145
|
entry.seq_name ||= i
|
|
147
146
|
|
|
148
147
|
@status[:sequences_in] += 1
|
|
149
|
-
@status[:residues_in]
|
|
148
|
+
@status[:residues_in] += entry.length
|
|
150
149
|
|
|
151
150
|
stdin.puts entry.to_fasta
|
|
152
151
|
end
|
data/lib/BioDSL/config.rb
CHANGED
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -36,7 +36,7 @@ module BioDSL
|
|
|
36
36
|
HISTORY_FILE = File.join(ENV['HOME'], '.BioDSL_history')
|
|
37
37
|
LOG_FILE = File.join(ENV['HOME'], '.BioDSL_log')
|
|
38
38
|
RC_FILE = File.join(ENV['HOME'], '.BioDSLrc')
|
|
39
|
-
STATUS_PROGRESS_INTERVAL = 0.1
|
|
39
|
+
STATUS_PROGRESS_INTERVAL = 0.1 # update progress every n second.
|
|
40
40
|
|
|
41
41
|
options = options_load_rc({}, :pipeline)
|
|
42
42
|
|
data/lib/BioDSL/csv.rb
CHANGED
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of BioDSL (
|
|
24
|
+
# This software is part of BioDSL (http://maasha.github.io/BioDSL). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
|
@@ -283,15 +283,13 @@ module BioDSL
|
|
|
283
283
|
fields.each do |field|
|
|
284
284
|
field = field.to_num
|
|
285
285
|
|
|
286
|
-
if field.is_a? Fixnum
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
types << nil
|
|
294
|
-
end
|
|
286
|
+
types << if field.is_a? Fixnum
|
|
287
|
+
:to_i
|
|
288
|
+
elsif field.is_a? Float
|
|
289
|
+
:to_f
|
|
290
|
+
elsif field.is_a? String
|
|
291
|
+
:to_s
|
|
292
|
+
end
|
|
295
293
|
end
|
|
296
294
|
|
|
297
295
|
@types = types
|
data/lib/BioDSL/debug.rb
CHANGED
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
# #
|
|
22
22
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
23
|
# #
|
|
24
|
-
# This software is part of the BioDSL
|
|
24
|
+
# This software is part of the BioDSL (www.BioDSL.org). #
|
|
25
25
|
# #
|
|
26
26
|
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
27
|
|
data/lib/BioDSL/fasta.rb
CHANGED
|
@@ -1,45 +1,48 @@
|
|
|
1
|
-
#
|
|
2
|
-
#
|
|
3
|
-
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk).
|
|
4
|
-
#
|
|
5
|
-
# This program is free software; you can redistribute it and/or
|
|
6
|
-
# modify it under the terms of the GNU General Public License
|
|
7
|
-
# as published by the Free Software Foundation; either version 2
|
|
8
|
-
# of the License, or (at your option) any later version.
|
|
9
|
-
#
|
|
10
|
-
# This program is distributed in the hope that it will be useful,
|
|
11
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
-
# GNU General Public License for more details.
|
|
14
|
-
#
|
|
15
|
-
# You should have received a copy of the GNU General Public License
|
|
16
|
-
# along with this program; if not, write to the Free Software
|
|
17
|
-
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
|
18
|
-
#
|
|
19
|
-
#
|
|
20
|
-
#
|
|
21
|
-
#
|
|
22
|
-
#
|
|
23
|
-
#
|
|
24
|
-
#
|
|
25
|
-
#
|
|
26
|
-
|
|
1
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
2
|
+
# #
|
|
3
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
4
|
+
# #
|
|
5
|
+
# This program is free software; you can redistribute it and/or #
|
|
6
|
+
# modify it under the terms of the GNU General Public License #
|
|
7
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
8
|
+
# of the License, or (at your option) any later version. #
|
|
9
|
+
# #
|
|
10
|
+
# This program is distributed in the hope that it will be useful, #
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
13
|
+
# GNU General Public License for more details. #
|
|
14
|
+
# #
|
|
15
|
+
# You should have received a copy of the GNU General Public License #
|
|
16
|
+
# along with this program; if not, write to the Free Software #
|
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
18
|
+
# USA. #
|
|
19
|
+
# #
|
|
20
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
21
|
+
# #
|
|
22
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
|
+
# #
|
|
24
|
+
# This software is part of BioDSL (http://maasha.github.io/BioDSL). #
|
|
25
|
+
# #
|
|
26
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
|
+
|
|
28
|
+
# Namespace for BioDSL.
|
|
27
29
|
module BioDSL
|
|
28
30
|
# Error class for all exceptions to do with FASTA.
|
|
29
31
|
class FastaError < StandardError; end
|
|
30
32
|
|
|
33
|
+
# Class for reading and writing FASTA files.
|
|
31
34
|
class Fasta
|
|
32
35
|
def self.open(*args)
|
|
33
36
|
ios = IO.open(*args)
|
|
34
37
|
|
|
35
38
|
if block_given?
|
|
36
39
|
begin
|
|
37
|
-
yield
|
|
40
|
+
yield new(ios)
|
|
38
41
|
ensure
|
|
39
42
|
ios.close
|
|
40
43
|
end
|
|
41
44
|
else
|
|
42
|
-
return
|
|
45
|
+
return new(ios)
|
|
43
46
|
end
|
|
44
47
|
end
|
|
45
48
|
|
|
@@ -60,13 +63,13 @@ module BioDSL
|
|
|
60
63
|
def initialize(io)
|
|
61
64
|
@io = io
|
|
62
65
|
@seq_name = nil
|
|
63
|
-
@seq =
|
|
66
|
+
@seq = ''
|
|
64
67
|
@got_first = nil
|
|
65
68
|
@got_last = nil
|
|
66
69
|
end
|
|
67
70
|
|
|
68
71
|
def each
|
|
69
|
-
while entry = next_entry
|
|
72
|
+
while (entry = next_entry)
|
|
70
73
|
yield entry
|
|
71
74
|
end
|
|
72
75
|
end
|
|
@@ -84,24 +87,33 @@ module BioDSL
|
|
|
84
87
|
next if line.empty?
|
|
85
88
|
|
|
86
89
|
if line[0] == '>'
|
|
87
|
-
if
|
|
88
|
-
|
|
90
|
+
if !@got_first && !@seq.empty?
|
|
91
|
+
unless @seq.empty?
|
|
92
|
+
fail FastaError, 'Bad FASTA format -> content before Fasta ' \
|
|
93
|
+
"header: #{@seq}"
|
|
94
|
+
end
|
|
89
95
|
end
|
|
90
96
|
|
|
91
97
|
@got_first = true
|
|
92
98
|
|
|
93
99
|
if @seq_name
|
|
94
100
|
entry = Seq.new(seq_name: @seq_name, seq: @seq)
|
|
95
|
-
@seq_name = line[1
|
|
96
|
-
@seq =
|
|
101
|
+
@seq_name = line[1..-1]
|
|
102
|
+
@seq = ''
|
|
97
103
|
|
|
98
|
-
|
|
104
|
+
if @seq_name.empty?
|
|
105
|
+
fail FastaError, 'Bad FASTA format -> truncated Fasta header: ' \
|
|
106
|
+
'no content after \'>\''
|
|
107
|
+
end
|
|
99
108
|
|
|
100
109
|
return entry
|
|
101
110
|
else
|
|
102
|
-
@seq_name = line[1
|
|
111
|
+
@seq_name = line[1..-1]
|
|
103
112
|
|
|
104
|
-
|
|
113
|
+
if @seq_name.empty?
|
|
114
|
+
fail FastaError, 'Bad FASTA format -> truncated Fasta header: ' \
|
|
115
|
+
' no content after \'>\''
|
|
116
|
+
end
|
|
105
117
|
end
|
|
106
118
|
else
|
|
107
119
|
@seq << line
|
|
@@ -115,16 +127,18 @@ module BioDSL
|
|
|
115
127
|
return entry
|
|
116
128
|
end
|
|
117
129
|
|
|
118
|
-
if
|
|
119
|
-
|
|
130
|
+
if !@got_last && !@seq.empty?
|
|
131
|
+
fail FastaError, 'Bad FASTA format -> content witout Fasta header: ' +
|
|
132
|
+
@seq
|
|
120
133
|
end
|
|
121
134
|
|
|
122
135
|
nil
|
|
123
136
|
end
|
|
124
137
|
|
|
138
|
+
# Class for FASTA IO
|
|
125
139
|
class IO < Filesys
|
|
126
140
|
def each
|
|
127
|
-
|
|
141
|
+
until @io.eof?
|
|
128
142
|
yield @io.gets
|
|
129
143
|
end
|
|
130
144
|
end
|
data/lib/BioDSL/fastq.rb
CHANGED
|
@@ -1,29 +1,31 @@
|
|
|
1
|
-
#
|
|
2
|
-
#
|
|
3
|
-
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk).
|
|
4
|
-
#
|
|
5
|
-
# This program is free software; you can redistribute it and/or
|
|
6
|
-
# modify it under the terms of the GNU General Public License
|
|
7
|
-
# as published by the Free Software Foundation; either version 2
|
|
8
|
-
# of the License, or (at your option) any later version.
|
|
9
|
-
#
|
|
10
|
-
# This program is distributed in the hope that it will be useful,
|
|
11
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
-
# GNU General Public License for more details.
|
|
14
|
-
#
|
|
15
|
-
# You should have received a copy of the GNU General Public License
|
|
16
|
-
# along with this program; if not, write to the Free Software
|
|
17
|
-
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
|
18
|
-
#
|
|
19
|
-
#
|
|
20
|
-
#
|
|
21
|
-
#
|
|
22
|
-
#
|
|
23
|
-
#
|
|
24
|
-
#
|
|
25
|
-
#
|
|
1
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
2
|
+
# #
|
|
3
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
4
|
+
# #
|
|
5
|
+
# This program is free software; you can redistribute it and/or #
|
|
6
|
+
# modify it under the terms of the GNU General Public License #
|
|
7
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
8
|
+
# of the License, or (at your option) any later version. #
|
|
9
|
+
# #
|
|
10
|
+
# This program is distributed in the hope that it will be useful, #
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
13
|
+
# GNU General Public License for more details. #
|
|
14
|
+
# #
|
|
15
|
+
# You should have received a copy of the GNU General Public License #
|
|
16
|
+
# along with this program; if not, write to the Free Software #
|
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
18
|
+
# USA. #
|
|
19
|
+
# #
|
|
20
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
21
|
+
# #
|
|
22
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
|
+
# #
|
|
24
|
+
# This software is part of BioDSL (http://maasha.github.io/BioDSL). #
|
|
25
|
+
# #
|
|
26
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
27
|
|
|
28
|
+
# Namespace for BioDSL.
|
|
27
29
|
module BioDSL
|
|
28
30
|
# Error class for all exceptions to do with FASTQ.
|
|
29
31
|
class FastqError < StandardError; end
|
|
@@ -35,21 +37,21 @@ module BioDSL
|
|
|
35
37
|
|
|
36
38
|
if block_given?
|
|
37
39
|
begin
|
|
38
|
-
yield
|
|
40
|
+
yield new(ios)
|
|
39
41
|
ensure
|
|
40
42
|
ios.close
|
|
41
43
|
end
|
|
42
44
|
else
|
|
43
|
-
return
|
|
45
|
+
return new(ios)
|
|
44
46
|
end
|
|
45
47
|
end
|
|
46
48
|
|
|
47
49
|
def initialize(io)
|
|
48
|
-
@io
|
|
50
|
+
@io = io
|
|
49
51
|
end
|
|
50
52
|
|
|
51
53
|
def each
|
|
52
|
-
while entry = next_entry
|
|
54
|
+
while (entry = next_entry)
|
|
53
55
|
yield entry
|
|
54
56
|
end
|
|
55
57
|
end
|
|
@@ -58,17 +60,18 @@ module BioDSL
|
|
|
58
60
|
# as a Seq object. If no entry is found or eof then nil is returned.
|
|
59
61
|
def next_entry
|
|
60
62
|
return nil if @io.eof?
|
|
61
|
-
seq_name = @io.gets[1
|
|
63
|
+
seq_name = @io.gets[1..-2]
|
|
62
64
|
seq = @io.gets.chomp
|
|
63
65
|
@io.gets
|
|
64
|
-
qual
|
|
66
|
+
qual = @io.gets.chomp
|
|
65
67
|
|
|
66
68
|
Seq.new(seq_name: seq_name, seq: seq, qual: qual)
|
|
67
69
|
end
|
|
68
70
|
|
|
71
|
+
# Class for FASTQ IO.
|
|
69
72
|
class IO < Filesys
|
|
70
73
|
def each
|
|
71
|
-
|
|
74
|
+
until @io.eof?
|
|
72
75
|
yield @io.gets
|
|
73
76
|
end
|
|
74
77
|
end
|