BioDSL 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/BioDSL.gemspec +64 -0
- data/LICENSE +339 -0
- data/README.md +205 -0
- data/Rakefile +94 -0
- data/examples/fastq_to_fasta.rb +8 -0
- data/lib/BioDSL/cary.rb +242 -0
- data/lib/BioDSL/command.rb +133 -0
- data/lib/BioDSL/commands/add_key.rb +110 -0
- data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
- data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
- data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
- data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
- data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
- data/lib/BioDSL/commands/classify_seq.rb +217 -0
- data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
- data/lib/BioDSL/commands/clip_primer.rb +318 -0
- data/lib/BioDSL/commands/cluster_otus.rb +181 -0
- data/lib/BioDSL/commands/collapse_otus.rb +170 -0
- data/lib/BioDSL/commands/collect_otus.rb +150 -0
- data/lib/BioDSL/commands/complement_seq.rb +117 -0
- data/lib/BioDSL/commands/count.rb +135 -0
- data/lib/BioDSL/commands/count_values.rb +149 -0
- data/lib/BioDSL/commands/degap_seq.rb +253 -0
- data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
- data/lib/BioDSL/commands/dump.rb +157 -0
- data/lib/BioDSL/commands/filter_rrna.rb +239 -0
- data/lib/BioDSL/commands/genecall.rb +237 -0
- data/lib/BioDSL/commands/grab.rb +535 -0
- data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
- data/lib/BioDSL/commands/mask_seq.rb +175 -0
- data/lib/BioDSL/commands/mean_scores.rb +168 -0
- data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
- data/lib/BioDSL/commands/merge_table.rb +225 -0
- data/lib/BioDSL/commands/merge_values.rb +113 -0
- data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
- data/lib/BioDSL/commands/plot_histogram.rb +306 -0
- data/lib/BioDSL/commands/plot_matches.rb +282 -0
- data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
- data/lib/BioDSL/commands/plot_scores.rb +285 -0
- data/lib/BioDSL/commands/random.rb +153 -0
- data/lib/BioDSL/commands/read_fasta.rb +222 -0
- data/lib/BioDSL/commands/read_fastq.rb +414 -0
- data/lib/BioDSL/commands/read_table.rb +329 -0
- data/lib/BioDSL/commands/reverse_seq.rb +113 -0
- data/lib/BioDSL/commands/slice_align.rb +400 -0
- data/lib/BioDSL/commands/slice_seq.rb +151 -0
- data/lib/BioDSL/commands/sort.rb +223 -0
- data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
- data/lib/BioDSL/commands/split_values.rb +165 -0
- data/lib/BioDSL/commands/trim_primer.rb +314 -0
- data/lib/BioDSL/commands/trim_seq.rb +192 -0
- data/lib/BioDSL/commands/uchime_ref.rb +170 -0
- data/lib/BioDSL/commands/uclust.rb +286 -0
- data/lib/BioDSL/commands/unique_values.rb +145 -0
- data/lib/BioDSL/commands/usearch_global.rb +171 -0
- data/lib/BioDSL/commands/usearch_local.rb +171 -0
- data/lib/BioDSL/commands/write_fasta.rb +207 -0
- data/lib/BioDSL/commands/write_fastq.rb +191 -0
- data/lib/BioDSL/commands/write_table.rb +419 -0
- data/lib/BioDSL/commands/write_tree.rb +167 -0
- data/lib/BioDSL/commands.rb +31 -0
- data/lib/BioDSL/config.rb +55 -0
- data/lib/BioDSL/csv.rb +307 -0
- data/lib/BioDSL/debug.rb +42 -0
- data/lib/BioDSL/fasta.rb +133 -0
- data/lib/BioDSL/fastq.rb +77 -0
- data/lib/BioDSL/filesys.rb +137 -0
- data/lib/BioDSL/fork.rb +145 -0
- data/lib/BioDSL/hamming.rb +128 -0
- data/lib/BioDSL/helpers/aux_helper.rb +44 -0
- data/lib/BioDSL/helpers/email_helper.rb +66 -0
- data/lib/BioDSL/helpers/history_helper.rb +40 -0
- data/lib/BioDSL/helpers/log_helper.rb +55 -0
- data/lib/BioDSL/helpers/options_helper.rb +405 -0
- data/lib/BioDSL/helpers/status_helper.rb +132 -0
- data/lib/BioDSL/helpers.rb +35 -0
- data/lib/BioDSL/html_report.rb +200 -0
- data/lib/BioDSL/math.rb +55 -0
- data/lib/BioDSL/mummer.rb +216 -0
- data/lib/BioDSL/pipeline.rb +354 -0
- data/lib/BioDSL/seq/ambiguity.rb +66 -0
- data/lib/BioDSL/seq/assemble.rb +240 -0
- data/lib/BioDSL/seq/backtrack.rb +252 -0
- data/lib/BioDSL/seq/digest.rb +99 -0
- data/lib/BioDSL/seq/dynamic.rb +263 -0
- data/lib/BioDSL/seq/homopolymer.rb +59 -0
- data/lib/BioDSL/seq/kmer.rb +293 -0
- data/lib/BioDSL/seq/levenshtein.rb +113 -0
- data/lib/BioDSL/seq/translate.rb +109 -0
- data/lib/BioDSL/seq/trim.rb +188 -0
- data/lib/BioDSL/seq.rb +742 -0
- data/lib/BioDSL/serializer.rb +98 -0
- data/lib/BioDSL/stream.rb +113 -0
- data/lib/BioDSL/taxonomy.rb +691 -0
- data/lib/BioDSL/test.rb +42 -0
- data/lib/BioDSL/tmp_dir.rb +68 -0
- data/lib/BioDSL/usearch.rb +301 -0
- data/lib/BioDSL/verbose.rb +42 -0
- data/lib/BioDSL/version.rb +31 -0
- data/lib/BioDSL.rb +81 -0
- data/test/BioDSL/commands/test_add_key.rb +105 -0
- data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
- data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
- data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
- data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
- data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
- data/test/BioDSL/commands/test_clip_primer.rb +377 -0
- data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
- data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
- data/test/BioDSL/commands/test_collect_otus.rb +82 -0
- data/test/BioDSL/commands/test_complement_seq.rb +78 -0
- data/test/BioDSL/commands/test_count.rb +103 -0
- data/test/BioDSL/commands/test_count_values.rb +85 -0
- data/test/BioDSL/commands/test_degap_seq.rb +96 -0
- data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
- data/test/BioDSL/commands/test_dump.rb +109 -0
- data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
- data/test/BioDSL/commands/test_genecall.rb +50 -0
- data/test/BioDSL/commands/test_grab.rb +398 -0
- data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
- data/test/BioDSL/commands/test_mask_seq.rb +98 -0
- data/test/BioDSL/commands/test_mean_scores.rb +111 -0
- data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
- data/test/BioDSL/commands/test_merge_table.rb +131 -0
- data/test/BioDSL/commands/test_merge_values.rb +83 -0
- data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
- data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
- data/test/BioDSL/commands/test_plot_matches.rb +157 -0
- data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
- data/test/BioDSL/commands/test_plot_scores.rb +308 -0
- data/test/BioDSL/commands/test_random.rb +88 -0
- data/test/BioDSL/commands/test_read_fasta.rb +229 -0
- data/test/BioDSL/commands/test_read_fastq.rb +552 -0
- data/test/BioDSL/commands/test_read_table.rb +327 -0
- data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
- data/test/BioDSL/commands/test_slice_align.rb +218 -0
- data/test/BioDSL/commands/test_slice_seq.rb +131 -0
- data/test/BioDSL/commands/test_sort.rb +128 -0
- data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
- data/test/BioDSL/commands/test_split_values.rb +95 -0
- data/test/BioDSL/commands/test_trim_primer.rb +329 -0
- data/test/BioDSL/commands/test_trim_seq.rb +150 -0
- data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
- data/test/BioDSL/commands/test_uclust.rb +139 -0
- data/test/BioDSL/commands/test_unique_values.rb +98 -0
- data/test/BioDSL/commands/test_usearch_global.rb +123 -0
- data/test/BioDSL/commands/test_usearch_local.rb +125 -0
- data/test/BioDSL/commands/test_write_fasta.rb +159 -0
- data/test/BioDSL/commands/test_write_fastq.rb +166 -0
- data/test/BioDSL/commands/test_write_table.rb +411 -0
- data/test/BioDSL/commands/test_write_tree.rb +122 -0
- data/test/BioDSL/helpers/test_options_helper.rb +272 -0
- data/test/BioDSL/seq/test_assemble.rb +98 -0
- data/test/BioDSL/seq/test_backtrack.rb +176 -0
- data/test/BioDSL/seq/test_digest.rb +71 -0
- data/test/BioDSL/seq/test_dynamic.rb +133 -0
- data/test/BioDSL/seq/test_homopolymer.rb +58 -0
- data/test/BioDSL/seq/test_kmer.rb +134 -0
- data/test/BioDSL/seq/test_translate.rb +75 -0
- data/test/BioDSL/seq/test_trim.rb +101 -0
- data/test/BioDSL/test_cary.rb +176 -0
- data/test/BioDSL/test_command.rb +45 -0
- data/test/BioDSL/test_csv.rb +514 -0
- data/test/BioDSL/test_debug.rb +42 -0
- data/test/BioDSL/test_fasta.rb +154 -0
- data/test/BioDSL/test_fastq.rb +46 -0
- data/test/BioDSL/test_filesys.rb +145 -0
- data/test/BioDSL/test_fork.rb +85 -0
- data/test/BioDSL/test_math.rb +41 -0
- data/test/BioDSL/test_mummer.rb +79 -0
- data/test/BioDSL/test_pipeline.rb +187 -0
- data/test/BioDSL/test_seq.rb +790 -0
- data/test/BioDSL/test_serializer.rb +72 -0
- data/test/BioDSL/test_stream.rb +55 -0
- data/test/BioDSL/test_taxonomy.rb +336 -0
- data/test/BioDSL/test_test.rb +42 -0
- data/test/BioDSL/test_tmp_dir.rb +58 -0
- data/test/BioDSL/test_usearch.rb +33 -0
- data/test/BioDSL/test_verbose.rb +42 -0
- data/test/helper.rb +82 -0
- data/www/command.html.haml +14 -0
- data/www/css.html.haml +55 -0
- data/www/input_files.html.haml +3 -0
- data/www/layout.html.haml +12 -0
- data/www/output_files.html.haml +3 -0
- data/www/overview.html.haml +15 -0
- data/www/pipeline.html.haml +4 -0
- data/www/png.html.haml +2 -0
- data/www/status.html.haml +9 -0
- data/www/time.html.haml +11 -0
- metadata +503 -0
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
2
|
+
# #
|
|
3
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
4
|
+
# #
|
|
5
|
+
# This program is free software; you can redistribute it and/or #
|
|
6
|
+
# modify it under the terms of the GNU General Public License #
|
|
7
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
8
|
+
# of the License, or (at your option) any later version. #
|
|
9
|
+
# #
|
|
10
|
+
# This program is distributed in the hope that it will be useful, #
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
13
|
+
# GNU General Public License for more details. #
|
|
14
|
+
# #
|
|
15
|
+
# You should have received a copy of the GNU General Public License #
|
|
16
|
+
# along with this program; if not, write to the Free Software #
|
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
18
|
+
# USA. #
|
|
19
|
+
# #
|
|
20
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
21
|
+
# #
|
|
22
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
|
+
# #
|
|
24
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
25
|
+
# #
|
|
26
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
|
+
|
|
28
|
+
module BioDSL
|
|
29
|
+
# Class for creating HTML reports from an executed BioDSL pipeline.
|
|
30
|
+
class HtmlReport
|
|
31
|
+
require 'tilt/haml'
|
|
32
|
+
require 'base64'
|
|
33
|
+
require 'BioDSL/helpers/options_helper'
|
|
34
|
+
|
|
35
|
+
include OptionsHelper
|
|
36
|
+
|
|
37
|
+
# Constructor for HtmlReport.
|
|
38
|
+
#
|
|
39
|
+
# @param pipeline [BioPeices::Pipeline] Pipeline object
|
|
40
|
+
def initialize(pipeline)
|
|
41
|
+
@pipeline = pipeline
|
|
42
|
+
@commands = pipeline.commands
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Render HTML output.
|
|
46
|
+
def to_html
|
|
47
|
+
render('layout.html.haml', self, pipeline: @pipeline.to_s,
|
|
48
|
+
commands: @commands)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
# Render HTML templates.
|
|
54
|
+
#
|
|
55
|
+
# @param template [Path] Path to template file.
|
|
56
|
+
# @param scope [Object] Scope.
|
|
57
|
+
# @param args [Hash] Argument hash.
|
|
58
|
+
def render(template, scope, args = {})
|
|
59
|
+
Tilt.new(File.join(root_dir, template)).render(scope, args)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Render HTML CSS section.
|
|
63
|
+
def render_css
|
|
64
|
+
render('css.html.haml', self)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Render HTML pipeline section
|
|
68
|
+
#
|
|
69
|
+
# @param pipeline [String] String from BioDSL::Pipeline#to_s
|
|
70
|
+
def render_pipeline(pipeline)
|
|
71
|
+
pipeline = pipeline.scan(/[^.]+\(.*?\)|[^.(]+/).join(".\n").sub(/\n/, '')
|
|
72
|
+
|
|
73
|
+
render('pipeline.html.haml', self, pipeline: pipeline)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Render HTML overview section.
|
|
77
|
+
#
|
|
78
|
+
# @param commands [Array] List of commands from a pipeline.
|
|
79
|
+
def render_overview(commands)
|
|
80
|
+
render('overview.html.haml', self, commands: commands)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Render HTML command section.
|
|
84
|
+
#
|
|
85
|
+
# @param command [BioDSL::Command] Command object.
|
|
86
|
+
def render_command(command, index)
|
|
87
|
+
render('command.html.haml', self, command: command, index: index)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Render HTML status section.
|
|
91
|
+
#
|
|
92
|
+
# @param command [BioDSL::Command] Command object.
|
|
93
|
+
def render_status(command)
|
|
94
|
+
stats = command.status.reject { |k, _| k.to_s[0..3] == 'time' }
|
|
95
|
+
render('status.html.haml', self, exit_status: command.run_status,
|
|
96
|
+
statsus: stats)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Render HTML time section.
|
|
100
|
+
#
|
|
101
|
+
# @param status [BioDSL::Status] Status object.
|
|
102
|
+
def render_time(status)
|
|
103
|
+
render('time.html.haml', self, status: status)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Render HTML input files section.
|
|
107
|
+
#
|
|
108
|
+
# @param options [Hash] Command options hash.
|
|
109
|
+
def render_input_files(options)
|
|
110
|
+
render('input_files.html.haml', self,
|
|
111
|
+
files: options_glob(options[:input]))
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Render HTML output file section.
|
|
115
|
+
#
|
|
116
|
+
# @param options [Hash] Command options hash.
|
|
117
|
+
def render_output_files(options)
|
|
118
|
+
render('output_files.html.haml', self, options: options)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Render PNG data.
|
|
122
|
+
#
|
|
123
|
+
# @param options [Hash] Command options hash.
|
|
124
|
+
def render_png(options)
|
|
125
|
+
path = options[:output]
|
|
126
|
+
png_data = 'data:image/png;base64,'
|
|
127
|
+
|
|
128
|
+
File.open(path, 'r') do |ios|
|
|
129
|
+
png_data << Base64.encode64(ios.read)
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
render('png.html.haml', self, path: path, png_data: png_data)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Detect if any input options are set.
|
|
136
|
+
#
|
|
137
|
+
# @param options [Hash] Options hash.
|
|
138
|
+
# @option options [String] :input File glob expression.
|
|
139
|
+
#
|
|
140
|
+
# @return [Boolean]
|
|
141
|
+
def input?(options)
|
|
142
|
+
if options[:input]
|
|
143
|
+
true
|
|
144
|
+
else
|
|
145
|
+
false
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Detect if any output options are set.
|
|
150
|
+
#
|
|
151
|
+
# @param options [Hash] Options hash.
|
|
152
|
+
# @option options [String] :output Path to output file.
|
|
153
|
+
#
|
|
154
|
+
# @return [Boolean]
|
|
155
|
+
def output?(options)
|
|
156
|
+
if options[:output]
|
|
157
|
+
true
|
|
158
|
+
else
|
|
159
|
+
false
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Detect if any PNG file is available.
|
|
164
|
+
#
|
|
165
|
+
# @param options [Hash] Options hash.
|
|
166
|
+
# @option options [String] :output Path to output file.
|
|
167
|
+
# @option options [Symbol] :terminal Plot type.
|
|
168
|
+
#
|
|
169
|
+
# @return [Boolean]
|
|
170
|
+
def png?(options)
|
|
171
|
+
if options[:output] &&
|
|
172
|
+
options[:terminal] &&
|
|
173
|
+
options[:terminal] == :png &&
|
|
174
|
+
File.exist?(options[:output])
|
|
175
|
+
true
|
|
176
|
+
else
|
|
177
|
+
false
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Return the path of the HTML root dir.
|
|
182
|
+
#
|
|
183
|
+
# @return [String] Root dir.
|
|
184
|
+
def root_dir
|
|
185
|
+
File.join(File.dirname(__FILE__), '..', '..', 'www')
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# Return the help URL for a given command.
|
|
189
|
+
#
|
|
190
|
+
# @param command [Symbol] Command name.
|
|
191
|
+
#
|
|
192
|
+
# @return [String] HTML link.
|
|
193
|
+
def help_url(command)
|
|
194
|
+
camel = command.to_s.split('_').map(&:capitalize).join
|
|
195
|
+
|
|
196
|
+
'http://www.rubydoc.info/gems/BioDSL/' \
|
|
197
|
+
"#{BioDSL::VERSION}/BioDSL/#{camel}"
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
end
|
data/lib/BioDSL/math.rb
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
2
|
+
# #
|
|
3
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
4
|
+
# #
|
|
5
|
+
# This program is free software; you can redistribute it and/or #
|
|
6
|
+
# modify it under the terms of the GNU General Public License #
|
|
7
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
8
|
+
# of the License, or (at your option) any later version. #
|
|
9
|
+
# #
|
|
10
|
+
# This program is distributed in the hope that it will be useful, #
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
13
|
+
# GNU General Public License for more details. #
|
|
14
|
+
# #
|
|
15
|
+
# You should have received a copy of the GNU General Public License #
|
|
16
|
+
# along with this program; if not, write to the Free Software #
|
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
18
|
+
# USA. #
|
|
19
|
+
# #
|
|
20
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
21
|
+
# #
|
|
22
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
|
+
# #
|
|
24
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
25
|
+
# #
|
|
26
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
|
+
|
|
28
|
+
# Namespace for BioDSL.
|
|
29
|
+
module BioDSL
|
|
30
|
+
# Adding methods to Math module.
|
|
31
|
+
module Math
|
|
32
|
+
# Class method to calculate the distance from at point to a line.
|
|
33
|
+
# The point and line are given as pairs of coordinates.
|
|
34
|
+
def self.dist_point2line(
|
|
35
|
+
px, # point x coordinate
|
|
36
|
+
py, # point y coordinate
|
|
37
|
+
x1, # line 1 x coordinate
|
|
38
|
+
y1, # line 1 y coordinate
|
|
39
|
+
x2, # line 2 x coordinate
|
|
40
|
+
y2 # line 2 y coordinate
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
a = (y2 - y1).to_f / (x2 - x1).to_f
|
|
44
|
+
b = y1 - a * x1
|
|
45
|
+
|
|
46
|
+
(a * px + b - py).abs / ::Math.sqrt(a**2 + 1)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Class method to calculate the distance between two points given
|
|
50
|
+
# as pairs of coordinates.
|
|
51
|
+
def self.dist_point2point(x1, y1, x2, y2)
|
|
52
|
+
::Math.sqrt((x2.to_f - x1.to_f)**2 + (y2.to_f - y1.to_f)**2)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
2
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
3
|
+
# #
|
|
4
|
+
# This program is free software; you can redistribute it and/or #
|
|
5
|
+
# modify it under the terms of the GNU General Public License #
|
|
6
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
7
|
+
# of the License, or (at your option) any later version. #
|
|
8
|
+
# #
|
|
9
|
+
# This program is distributed in the hope that it will be useful, #
|
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
12
|
+
# GNU General Public License for more details. #
|
|
13
|
+
# #
|
|
14
|
+
# You should have received a copy of the GNU General Public License #
|
|
15
|
+
# along with this program; if not, write to the Free Software #
|
|
16
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
17
|
+
# USA. #
|
|
18
|
+
# #
|
|
19
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
20
|
+
# #
|
|
21
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
22
|
+
# #
|
|
23
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
|
|
27
|
+
# Namespace for BipPieces.
|
|
28
|
+
module BioDSL
|
|
29
|
+
# Error class for Mummer errors.
|
|
30
|
+
MummerError = Class.new(StandardError)
|
|
31
|
+
|
|
32
|
+
# rubocop: disable ClassLength
|
|
33
|
+
|
|
34
|
+
# Class for executing MUMmer and parsing MUMmer results.
|
|
35
|
+
class Mummer
|
|
36
|
+
# @param seq1 [BioDSL::Seq] Sequence 1.
|
|
37
|
+
# @param seq2 [BioPeices::Seq] Sequence 2.
|
|
38
|
+
# @param options [Hash] Options hash.
|
|
39
|
+
#
|
|
40
|
+
# @yield [Mummer::Match] A match object
|
|
41
|
+
# @return [Enumerable] An Enumerable
|
|
42
|
+
def self.each_mem(seq1, seq2, options = {})
|
|
43
|
+
mummer = new(seq1, seq2, options)
|
|
44
|
+
|
|
45
|
+
if block_given?
|
|
46
|
+
mummer.each_mem { |mem| yield mem }
|
|
47
|
+
else
|
|
48
|
+
mummer.each_mem
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Constructor for Mummer class.
|
|
53
|
+
#
|
|
54
|
+
# @param seq1 [BioDSL::Seq] Sequence 1.
|
|
55
|
+
# @param seq2 [BioPeices::Seq] Sequence 2.
|
|
56
|
+
# @param options [Hash] Options hash.
|
|
57
|
+
#
|
|
58
|
+
# @return [Mummer] Class instance.
|
|
59
|
+
def initialize(seq1, seq2, options = {})
|
|
60
|
+
@seq1 = seq1
|
|
61
|
+
@seq2 = seq2
|
|
62
|
+
@options = options
|
|
63
|
+
@command = []
|
|
64
|
+
@q_id = nil
|
|
65
|
+
@dir = nil
|
|
66
|
+
|
|
67
|
+
default_options
|
|
68
|
+
check_options
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# @yield [Mummer::Match] A match object
|
|
72
|
+
# @return [Enumerable] An Enumerable
|
|
73
|
+
def each_mem
|
|
74
|
+
return to_enum :each_mem unless block_given?
|
|
75
|
+
|
|
76
|
+
TmpDir.create('in1', 'in2', 'out') do |file_in1, file_in2, file_out|
|
|
77
|
+
BioDSL::Fasta.open(file_in1, 'w') { |io| io.puts @seq1.to_fasta }
|
|
78
|
+
BioDSL::Fasta.open(file_in2, 'w') { |io| io.puts @seq2.to_fasta }
|
|
79
|
+
|
|
80
|
+
execute(file_in1, file_in2, file_out)
|
|
81
|
+
|
|
82
|
+
File.open(file_out) do |io|
|
|
83
|
+
while (match = get_match(io))
|
|
84
|
+
yield match
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
private
|
|
91
|
+
|
|
92
|
+
# Get a match if possible.
|
|
93
|
+
#
|
|
94
|
+
# @param io [IO] IO stream.
|
|
95
|
+
#
|
|
96
|
+
# @return [Match, nil] match or nil whether a match was found.
|
|
97
|
+
def get_match(io)
|
|
98
|
+
io.each do |line|
|
|
99
|
+
line.chomp!
|
|
100
|
+
|
|
101
|
+
case line
|
|
102
|
+
when /^> (\S+)\s+Reverse\s+Len = \d+$/
|
|
103
|
+
@q_id = Regexp.last_match(1)
|
|
104
|
+
@dir = 'reverse'
|
|
105
|
+
when /^> (\S+)\s+Len = \d+$/
|
|
106
|
+
@q_id = Regexp.last_match(1)
|
|
107
|
+
@dir = 'forward'
|
|
108
|
+
when /^\s*(.\S+)\s+(\d+)\s+(\d+)\s+(\d+)$/
|
|
109
|
+
s_id = Regexp.last_match(1)
|
|
110
|
+
s_beg = Regexp.last_match(2).to_i - 1
|
|
111
|
+
q_beg = Regexp.last_match(3).to_i - 1
|
|
112
|
+
hit_len = Regexp.last_match(4).to_i
|
|
113
|
+
|
|
114
|
+
return Match.new(@q_id, s_id, @dir, s_beg, q_beg, hit_len)
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
nil
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Check that the options are OK
|
|
122
|
+
def check_options
|
|
123
|
+
check_length_min_value
|
|
124
|
+
check_length_min_type
|
|
125
|
+
check_direction
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Check the that the value of :length_min is OK.
|
|
129
|
+
#
|
|
130
|
+
# @raise [BioDSL::MummerError] on bad length_min value.
|
|
131
|
+
def check_length_min_value
|
|
132
|
+
return if @options[:length_min] > 0
|
|
133
|
+
|
|
134
|
+
fail MummerError, "Bad length_min: #{@options[:length_min]}"
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Check that the type of :length_min is OK.
|
|
138
|
+
#
|
|
139
|
+
# @raise [BioDSL::MummerError] on bad length_min type.
|
|
140
|
+
def check_length_min_type
|
|
141
|
+
return if @options[:length_min].class == Fixnum
|
|
142
|
+
|
|
143
|
+
fail MummerError, "Bad length_min type: #{@options[:length_min].class}"
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Check that the value of :direction is OK.
|
|
147
|
+
#
|
|
148
|
+
# @raise [BioDSL::MummerError] on bad direction.
|
|
149
|
+
def check_direction
|
|
150
|
+
return if @options[:direction] == :forward ||
|
|
151
|
+
@options[:direction] == :reverse ||
|
|
152
|
+
@options[:direction] == :both
|
|
153
|
+
|
|
154
|
+
fail MummerError, "Bad direction: #{@options[:direction]}"
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Set some sensible default options.
|
|
158
|
+
def default_options
|
|
159
|
+
@options[:length_min] ||= 20
|
|
160
|
+
@options[:direction] ||= :both
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Execute MUMmer.
|
|
164
|
+
#
|
|
165
|
+
# @param file_in1 [String] Path to sequence filen.
|
|
166
|
+
# @param file_in1 [String] Path to sequence filen.
|
|
167
|
+
# @param file_out [String] Path to output file.
|
|
168
|
+
def execute(file_in1, file_in2, file_out)
|
|
169
|
+
cmd = compile_command(file_in1, file_in2, file_out)
|
|
170
|
+
|
|
171
|
+
$stderr.puts "Running command: #{cmd}" if BioDSL.verbose
|
|
172
|
+
|
|
173
|
+
system(cmd)
|
|
174
|
+
|
|
175
|
+
fail "Error running command: #{cmd}" unless $CHILD_STATUS.success?
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Compile a command for execution of mummer.
|
|
179
|
+
#
|
|
180
|
+
# @param file_in1 [String] Path to sequence filen.
|
|
181
|
+
# @param file_in1 [String] Path to sequence filen.
|
|
182
|
+
# @param file_out [String] Path to output file.
|
|
183
|
+
#
|
|
184
|
+
# @return [String] Command string.
|
|
185
|
+
def compile_command(file_in1, file_in2, file_out)
|
|
186
|
+
@command << 'mummer'
|
|
187
|
+
@command << '-c' # report position of revcomp match relative to query seq.
|
|
188
|
+
@command << '-L' # show length of query seq in header.
|
|
189
|
+
@command << '-F' # force 4-column output.
|
|
190
|
+
@command << "-l #{@options[:length_min]}"
|
|
191
|
+
@command << '-n' # nucleotides only [atcg].
|
|
192
|
+
|
|
193
|
+
case @options[:direction]
|
|
194
|
+
when :reverse then @command << '-r' # only compute reverse matches.
|
|
195
|
+
when :both then @command << '-b' # compute forward and reverse matches.
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
@command << file_in1
|
|
199
|
+
@command << file_in2
|
|
200
|
+
@command << "> #{file_out}"
|
|
201
|
+
@command << '2>&1' unless BioDSL.verbose
|
|
202
|
+
|
|
203
|
+
@command.join(' ')
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
Match = Struct.new(:q_id, :s_id, :dir, :s_beg, :q_beg, :hit_len) do
|
|
207
|
+
def q_end
|
|
208
|
+
q_beg + hit_len - 1
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def s_end
|
|
212
|
+
s_beg + hit_len - 1
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
end
|