genevalidator 1.6.2 → 1.6.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +147 -76
- data/Rakefile +1 -1
- data/aux/files/css/genevalidator.compiled.min.css +16 -0
- data/aux/files/css/{bootstrap.min.css → src/bootstrap.min.css} +0 -0
- data/aux/files/css/{font-awesome.min.css → src/font-awesome.min.css} +0 -0
- data/aux/files/css/{style.css → src/style.css} +0 -0
- data/aux/files/js/genevalidator.compiled.min.js +28 -0
- data/aux/files/js/{bootstrap.min.js → src/bootstrap.min.js} +0 -0
- data/aux/files/js/{d3.v3.min.js → src/d3.v3.min.js} +0 -0
- data/aux/files/js/{jquery-2.1.1.min.js → src/jquery-2.1.1.min.js} +0 -0
- data/aux/files/js/{jquery.tablesorter.min.js → src/jquery.tablesorter.min.js} +0 -0
- data/aux/files/js/src/plots.js +814 -0
- data/aux/files/js/src/script.js +43 -0
- data/aux/json_header.erb +6 -6
- data/aux/json_query.erb +2 -1
- data/aux/template_footer.erb +0 -11
- data/aux/template_header.erb +4 -4
- data/aux/template_query.erb +1 -1
- data/bin/genevalidator +8 -6
- data/genevalidator.gemspec +1 -1
- data/lib/genevalidator.rb +7 -5
- data/lib/genevalidator/arg_validation.rb +12 -9
- data/lib/genevalidator/blast.rb +18 -11
- data/lib/genevalidator/clusterization.rb +35 -31
- data/lib/genevalidator/exceptions.rb +0 -1
- data/lib/genevalidator/get_raw_sequences.rb +115 -69
- data/lib/genevalidator/hsp.rb +8 -8
- data/lib/genevalidator/json_to_gv_results.rb +4 -4
- data/lib/genevalidator/output.rb +40 -41
- data/lib/genevalidator/pool.rb +5 -4
- data/lib/genevalidator/query.rb +37 -0
- data/lib/genevalidator/tabular_parser.rb +3 -4
- data/lib/genevalidator/validation.rb +16 -11
- data/lib/genevalidator/validation_alignment.rb +17 -23
- data/lib/genevalidator/validation_blast_reading_frame.rb +3 -3
- data/lib/genevalidator/validation_duplication.rb +8 -18
- data/lib/genevalidator/validation_gene_merge.rb +11 -9
- data/lib/genevalidator/validation_length_cluster.rb +8 -11
- data/lib/genevalidator/validation_length_rank.rb +5 -4
- data/lib/genevalidator/validation_open_reading_frame.rb +5 -5
- data/lib/genevalidator/version.rb +1 -1
- data/test/test_all_validations.rb +2 -1
- data/test/test_blast.rb +4 -3
- data/test/test_extended_array_methods.rb +2 -1
- data/test/{test_sequences.rb → test_query.rb} +5 -23
- data/test/test_validation_open_reading_frame.rb +7 -7
- data/test/test_validations.rb +8 -6
- metadata +16 -16
- data/aux/app_template_footer.erb +0 -1
- data/aux/app_template_header.erb +0 -12
- data/aux/files/js/plots.js +0 -828
- data/aux/files/js/script.js +0 -71
- data/lib/genevalidator/sequences.rb +0 -101
@@ -0,0 +1,43 @@
|
|
1
|
+
$(document).ready(function() {
|
2
|
+
if (window.chrome && (window.location.protocol === 'file:')){
|
3
|
+
$('#mainbrowseralertText').text("Some features are not supported in this browser and have been disabled.");
|
4
|
+
$('#browseralert').modal();
|
5
|
+
}
|
6
|
+
GV.toggleOverviewBtn(); // write overview to overview section
|
7
|
+
initTableSorter();
|
8
|
+
$("[data-toggle='tooltip']").tooltip(); //ToolTip
|
9
|
+
});
|
10
|
+
|
11
|
+
$(document).on( "click", "td, .plot_btn", function( event ) {
|
12
|
+
if ($(this).hasClass('success') || $(this).hasClass('danger')){
|
13
|
+
var title = $(this).attr('title');
|
14
|
+
var val = title.replace(/[ \/]/g, '');
|
15
|
+
GV.addData(this, val);
|
16
|
+
} else if ($(this).hasClass('plot_btn')){
|
17
|
+
GV.addData(this, 'all');
|
18
|
+
}
|
19
|
+
});
|
20
|
+
|
21
|
+
function initTableSorter() {
|
22
|
+
$.tablesorter.addParser({
|
23
|
+
id: 'star_scores', // called later when init the tablesorter
|
24
|
+
is: function() {
|
25
|
+
return false; // return false so this parser is not auto detected
|
26
|
+
},
|
27
|
+
format: function(s, table, cell, cellIndex) {
|
28
|
+
var $cell = $(cell);
|
29
|
+
if (cellIndex === 1) {
|
30
|
+
return $cell.attr('data-score') || s;
|
31
|
+
}
|
32
|
+
return s;
|
33
|
+
},
|
34
|
+
parsed: false,
|
35
|
+
type: 'numeric' // Setting type of data...
|
36
|
+
});
|
37
|
+
$('table').tablesorter({
|
38
|
+
headers: {
|
39
|
+
1 : { sorter: 'star_scores' } // Telling it to use custom parser...
|
40
|
+
},
|
41
|
+
sortList: [[0,0]],
|
42
|
+
});
|
43
|
+
}
|
data/aux/json_header.erb
CHANGED
@@ -1,15 +1,15 @@
|
|
1
|
-
<!DOCTYPE html><html><head><title>GeneValidator: identify problems with gene predictions</title><meta content="text/html; charset=utf-8" http-equiv="Content-Type"><script type="text/javascript" src="files/js/
|
1
|
+
<!DOCTYPE html><html><head><title>GeneValidator: identify problems with gene predictions</title><meta content="text/html; charset=utf-8" http-equiv="Content-Type"><script type="text/javascript" src="files/js/genevalidator.compiled.min.js"></script><link href="files/css/genevalidator.compiled.min.css" rel="stylesheet" type="text/css"></head>
|
2
2
|
<body>
|
3
3
|
<div aria-hidden="true" aria-labelledby="myModalLabel3" class="modal" id="spinner1" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content"><div class="modal-body text-center"><h2>Loading ...</h2><i class="fa fa-spinner fa-5x fa-spin"></i></div></div></div></div>
|
4
4
|
<div class="modal fade" id="alert" aria-hidden="true" aria-labelledby="myModalLabel5" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">×</span><span class="sr-only">Close</span></button><h4 class="modal-title">Oops, Something went wrong!</h4></div><div class="modal-body"><p>This operation is not posible. There seems to be too many queries...</p></div><div class="modal-footer"><button type="button" class="btn btn-default" data-dismiss="modal">Close</button></div></div></div></div>
|
5
|
-
<div class="modal fade" id="browseralert" aria-hidden="true" aria-labelledby="browser" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content browser-alert"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">×</span><span class="sr-only">Close</span></button><h4 class="modal-title">Incompatible Browser - Please use Mozilla Firefox!</h4></div><div class="modal-body" id="browseralertbody"><p id="browseralertText">
|
6
|
-
<div class="container"><span class="menu_icon pull-right"><a href="#"
|
7
|
-
<div id="
|
5
|
+
<div class="modal fade" id="browseralert" aria-hidden="true" aria-labelledby="browser" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content browser-alert"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">×</span><span class="sr-only">Close</span></button><h4 class="modal-title">Incompatible Browser - Please use Mozilla Firefox!</h4></div><div class="modal-body" id="browseralertbody"><p id="browseralertText"><strong id="mainbrowseralertText">Sorry, this feature is not supported in your browser.</strong></p><p>This is because Chrome does not allow access to local files. Thus, to avoid this, simply use a different browser (like Firefox or Safari) or start a local server in the results folder.</p><p>Please use <strong>Mozilla FireFox</strong> to view this file.</p></div></div></div></div>
|
6
|
+
<div class="container"><span class="menu_icon pull-right"><a href="#" onclick="GV.toggleAllPlots(this);"><i class="fa fa-2x fa-bar-chart-o"></i><br><span id="show_all_plots">Show All Charts</span></a></span><span class="menu_icon pull-right"><a href="https://github.com/wurmlab/genevalidator" target="_blank"><i class="fa fa-2x fa-github"></i><br>Source Code</a></span><span class="menu_icon pull-right"><a href="http://wurmlab.github.io/tools/genevalidator/"><i class="fa fa-2x fa-info-circle"></i><br>About</a></span><div class="clearfix"></div><img class="logo" src="files/img/gene.png" alt="logo"><div class="page-title"><h1>Gene Validator <small>v<%= GeneValidator::VERSION %></small></h1><h4 class="subheading">Identify Problems with Gene Predictions</h4></div><br/><br/>
|
7
|
+
<div id="overview" class="text-left"><div id="overview_text"></div><button id="overview_btn" class="btn btn-primary btn-sm" data-toggle="button" onclick="GV.toggleOverviewBtn();"></button></div><br><br>
|
8
8
|
<table id="sortable_table" class="table table-striped table-collapsed table-bordered table-condensed tablesorter"><thead>
|
9
9
|
<tr id="header"><th>#</th><th>Ranking</th><th>Sequence Definition <span data-toggle="tooltip" title="Query definition as it apears in the input fasta file." data-placement="top"><i class="fa fa-question-circle"></i></span></th><th>No. Hits <span data-toggle="tooltip" title="Number of non-identical hits found by BLAST." data-placement="top"><i class="fa fa-question-circle"></i></span></th>
|
10
10
|
<% @json_array[0]['validations'].each do |_short_header, item| %>
|
11
|
-
<th class="sorter-false"><b><%= item['header']
|
12
|
-
<% if item['header'] == "
|
11
|
+
<th class="sorter-false"><b><%= item['header'] %> </b>
|
12
|
+
<% if item['header'] == "Length Cluster" || item['header'] == "Gene Merge" || item['header'] == "Main ORF" || item['header'] == "Missing/Extra Sequences" %>
|
13
13
|
<span data-toggle="tooltip" title="Charts available for this validation" data-placement="top"><i class="fa fa-bar-chart-o chartIcon"></i></span> <span data-toggle="tooltip" title="<%=item['description']%>" data-placement="top"><i class="fa fa-question-circle"></i></span>
|
14
14
|
<% else %>
|
15
15
|
<span data-toggle="tooltip" title="<%=item['description']%>" data-placement="top"><i class="fa fa-question-circle"></i></span>
|
data/aux/json_query.erb
CHANGED
@@ -4,7 +4,8 @@
|
|
4
4
|
<td title="Definition"><%= @row['definition'] %></td>
|
5
5
|
<td title="No. Hits"><%= @row['no_hits'] %></td>
|
6
6
|
<% @row['validations'].each do |_short_header, item| %>
|
7
|
-
<td title="<%=item['header']%>" class="<%=item['status']%>"><%= item['print']
|
7
|
+
<td title="<%=item['header']%>" class="<%=item['status']%>"><%= item['print'].gsub(' ', ' ').gsub('; ', '; ')
|
8
|
+
%></td>
|
8
9
|
<% end %>
|
9
10
|
<% if @row['validations'].select{|_short_header, item| item['graphs'] != nil}.map{|_short_header, item| item['graphs'].length}.inject(0){|r, e| r+e } != 0 %>
|
10
11
|
<td><button title="Show plots" class="plot_btn btn btn-default"><i class="fa fa-bar-chart-o"></i></button></td>
|
data/aux/template_footer.erb
CHANGED
@@ -1,15 +1,4 @@
|
|
1
1
|
</tbody></table>
|
2
|
-
<script>
|
3
|
-
var evaluation_div = document.getElementById('report_1');
|
4
|
-
var content_less = "<br><%= less %><div class=\"clearfix\"></div><br><button type=\"button\" onClick='add_content()' class=\"btn btn-primary btn-sm\"> Show More</button>";
|
5
|
-
var content = "<%= evaluation %> <div class=\"clearfix\"></div><br><button type=\"button\" onClick='evaluation_div.innerHTML=content_less' class=\"btn btn-primary btn-sm\"> Show Less</button>";
|
6
|
-
evaluation_div.innerHTML = content_less;
|
7
|
-
function add_content(){
|
8
|
-
evaluation_div = document.getElementById('report_1');
|
9
|
-
evaluation_div.innerHTML=content;
|
10
|
-
addOverallPlot('files/json/overview.json');
|
11
|
-
}
|
12
|
-
</script>
|
13
2
|
</div>
|
14
3
|
<% if output_files.length > 1 %>
|
15
4
|
<nav><ul class="pagination">
|
data/aux/template_header.erb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
<!DOCTYPE html><html><head><title>GeneValidator: identify problems with gene predictions</title><meta content="text/html; charset=utf-8" http-equiv="Content-Type"><script type="text/javascript" src="files/js/
|
1
|
+
<!DOCTYPE html><html><head><title>GeneValidator: identify problems with gene predictions</title><meta content="text/html; charset=utf-8" http-equiv="Content-Type"><script type="text/javascript" src="files/js/genevalidator.compiled.min.js"></script><link href="files/css/genevalidator.compiled.min.css" rel="stylesheet" type="text/css"></head>
|
2
2
|
<body>
|
3
3
|
<div aria-hidden="true" aria-labelledby="myModalLabel3" class="modal" id="spinner1" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content"><div class="modal-body text-center"><h2>Loading ...</h2><i class="fa fa-spinner fa-5x fa-spin"></i></div></div></div></div>
|
4
4
|
<div class="modal fade" id="alert" aria-hidden="true" aria-labelledby="myModalLabel5" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">×</span><span class="sr-only">Close</span></button><h4 class="modal-title">Oops, Something went wrong!</h4></div><div class="modal-body"><p>This operation is not posible. There seems to be too many queries...</p></div><div class="modal-footer"><button type="button" class="btn btn-default" data-dismiss="modal">Close</button></div></div></div></div>
|
5
|
-
<div class="modal fade" id="browseralert" aria-hidden="true" aria-labelledby="browser" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content browser-alert"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">×</span><span class="sr-only">Close</span></button><h4 class="modal-title">Incompatible Browser - Please use Mozilla Firefox!</h4></div><div class="modal-body" id="browseralertbody"><p id="browseralertText">
|
6
|
-
<div class="container"><span class="menu_icon pull-right"><a href="#"
|
7
|
-
<div id="
|
5
|
+
<div class="modal fade" id="browseralert" aria-hidden="true" aria-labelledby="browser" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content browser-alert"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">×</span><span class="sr-only">Close</span></button><h4 class="modal-title">Incompatible Browser - Please use Mozilla Firefox!</h4></div><div class="modal-body" id="browseralertbody"><p id="browseralertText"><strong id="mainbrowseralertText">Sorry, this feature is not supported in your browser.</strong></p><p>This is because Chrome does not allow access to local files. Thus, to avoid this, simply use a different browser (like Firefox or Safari) or start a local server in the results folder.</p><p>Please use <strong>Mozilla FireFox</strong> to view this file.</p></div></div></div></div>
|
6
|
+
<div class="container"><span class="menu_icon pull-right"><a href="#" onclick="GV.toggleAllPlots(this);"><i class="fa fa-2x fa-bar-chart-o"></i><br><span id="show_all_plots">Show All Charts</span></a></span><span class="menu_icon pull-right"><a href="https://github.com/wurmlab/genevalidator" target="_blank"><i class="fa fa-2x fa-github"></i><br>Source Code</a></span><span class="menu_icon pull-right"><a href="http://wurmlab.github.io/tools/genevalidator/"><i class="fa fa-2x fa-info-circle"></i><br>About</a></span><div class="clearfix"></div><img class="logo" src="files/img/gene.png" alt="logo"><div class="page-title"><h1>Gene Validator <small>v<%= GeneValidator::VERSION %></small></h1><h4 class="subheading">Identify Problems with Gene Predictions</h4></div><br/><br/>
|
7
|
+
<div id="overview" class="text-left"><div id="overview_text"></div><button id="overview_btn" class="btn btn-primary btn-sm" data-toggle="button" onclick="GV.toggleOverviewBtn();"></button></div><br><br>
|
8
8
|
<table id="sortable_table" class="table table-striped table-collapsed table-bordered table-condensed tablesorter"><thead>
|
9
9
|
<tr id="header"><th>#</th><th>Ranking</th><th>Sequence Definition <span data-toggle="tooltip" title="Query definition as it apears in the input fasta file." data-placement="top"><i class="fa fa-question-circle"></i></span></th><th>No. Hits <span data-toggle="tooltip" title="Number of non-identical hits found by BLAST." data-placement="top"><i class="fa fa-question-circle"></i></span></th>
|
10
10
|
<% @validations.each do |item| %>
|
data/aux/template_query.erb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
<tr data-target="toggle<%= @idx %>" data-jsonFile="files/json/<%= @config[:filename] %>_<%= @idx %>.json">
|
2
2
|
<td title="idx"><%= @idx %></td>
|
3
|
-
<td data-score="<%= overall_score %>"><div class="ratings"><div class="empty-stars"></div><div class="full-stars" style="width:<%= overall_score
|
3
|
+
<td data-score="<%= overall_score %>"><div class="ratings"><div class="empty-stars"></div><div class="full-stars" style="width:<%= overall_score %>%;"></div></div></td>
|
4
4
|
<td title="Definition"><%= @prediction_def %></td>
|
5
5
|
<td title="No. Hits"><%= @nr_hits %></td>
|
6
6
|
<% @validations.each do |item| %>
|
data/bin/genevalidator
CHANGED
@@ -11,7 +11,7 @@ opt_parser = OptionParser.new do |opts|
|
|
11
11
|
opts.banner = <<BANNER
|
12
12
|
|
13
13
|
USAGE:
|
14
|
-
|
14
|
+
genevalidator [OPTIONS] Input_File
|
15
15
|
|
16
16
|
ARGUMENTS:
|
17
17
|
Input_File: Path to the input fasta file containing the predicted sequences.
|
@@ -38,7 +38,7 @@ BANNER
|
|
38
38
|
opts.on('-d', '--db [BLAST_DATABASE]',
|
39
39
|
'Path to the BLAST database',
|
40
40
|
'GeneValidator also supports remote databases:',
|
41
|
-
'e.g.
|
41
|
+
'e.g. genevalidator -d "swissprot -remote" Input_File') do |db|
|
42
42
|
opt[:db] = db
|
43
43
|
end
|
44
44
|
|
@@ -50,8 +50,8 @@ BANNER
|
|
50
50
|
end
|
51
51
|
|
52
52
|
opts.on('-j', '--json_file [JSON_FILE]',
|
53
|
-
'Generate
|
54
|
-
'produced
|
53
|
+
'Generate HTML report from a JSON file (or a subset of a JSON file)',
|
54
|
+
'produced by GeneValidator') do |json|
|
55
55
|
opt[:json_file] = json
|
56
56
|
end
|
57
57
|
|
@@ -80,7 +80,7 @@ BANNER
|
|
80
80
|
'BLAST and Mafft within GeneValidator.') do |num_threads|
|
81
81
|
opt[:num_threads] = num_threads
|
82
82
|
end
|
83
|
-
|
83
|
+
|
84
84
|
opts.on('-r', '--raw_sequences [raw_seq]',
|
85
85
|
'Supply a fasta file of the raw sequences of all BLAST hits present',
|
86
86
|
'in the supplied BLAST XML or BLAST tabular file.') do |raw_seq|
|
@@ -90,7 +90,7 @@ BANNER
|
|
90
90
|
opts.on('-b', '--binaries [binaries]', Array,
|
91
91
|
'Path to BLAST and MAFFT bin folders (is added to $PATH variable)',
|
92
92
|
'To be provided as follows:',
|
93
|
-
'
|
93
|
+
'e.g. genevalidator -b /blast/bin/path/ -b /mafft/bin/path/') do |bin|
|
94
94
|
(opt[:bin] ||= []).concat(bin)
|
95
95
|
end
|
96
96
|
|
@@ -127,9 +127,11 @@ start = Time.now
|
|
127
127
|
if opt[:extract_raw_seqs] && opt[:raw_sequences].nil?
|
128
128
|
GeneValidator.opt = opt
|
129
129
|
GeneValidator.config = {}
|
130
|
+
# GVArgValidation.run_raw_seqs
|
130
131
|
GeneValidator::RawSequences.run
|
131
132
|
elsif opt[:json_file]
|
132
133
|
GeneValidator.opt = opt
|
134
|
+
# GVArgValidation.run_json_to_gv_results
|
133
135
|
GeneValidator::JsonToGVResults.run
|
134
136
|
else
|
135
137
|
GeneValidator.init(opt)
|
data/genevalidator.gemspec
CHANGED
data/lib/genevalidator.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
require 'fileutils'
|
2
|
-
|
3
2
|
require 'bio-blastxmlparser'
|
4
3
|
|
5
4
|
require 'genevalidator/arg_validation'
|
@@ -68,16 +67,19 @@ module GeneValidator
|
|
68
67
|
# Parse the blast output and run validations
|
69
68
|
def run
|
70
69
|
# Run BLAST on all sequences (generates @opt[:blast_xml_file])
|
71
|
-
#
|
70
|
+
# if no BLAST OUTPUT file provided...
|
72
71
|
unless @opt[:blast_xml_file] || @opt[:blast_tabular_file]
|
73
72
|
BlastUtils.run_blast_on_input_file
|
74
73
|
end
|
75
|
-
# Obtain fasta file of all BLAST hits
|
76
|
-
|
74
|
+
# Obtain fasta file of all BLAST hits if running align or dup validations
|
75
|
+
if @opt[:validations].include?('align') ||
|
76
|
+
@opt[:validations].include?('dup')
|
77
|
+
RawSequences.run unless @opt[:raw_sequences]
|
78
|
+
end
|
77
79
|
# Run Validations
|
78
80
|
iterator = parse_blast_output_file
|
79
81
|
(Validations.new).run_validations(iterator)
|
80
|
-
|
82
|
+
|
81
83
|
Output.write_json_file(@config[:json_output], @config[:json_file])
|
82
84
|
Output.print_footer(@overview, @config)
|
83
85
|
end
|
@@ -26,7 +26,6 @@ module GeneValidator
|
|
26
26
|
check_num_threads
|
27
27
|
|
28
28
|
export_bin_dirs unless @opt[:bin].nil?
|
29
|
-
|
30
29
|
Blast.validate(opt) unless @opt[:test]
|
31
30
|
assert_mafft_installation
|
32
31
|
end
|
@@ -53,7 +52,8 @@ module GeneValidator
|
|
53
52
|
$stderr.puts 'Number of threads can not be lower than 0'
|
54
53
|
end
|
55
54
|
return unless @opt[:num_threads] > 256
|
56
|
-
$stderr.puts "Number of threads set at #{@opt[:num_threads]} is
|
55
|
+
$stderr.puts "Number of threads set at #{@opt[:num_threads]} is" \
|
56
|
+
' unusually high.'
|
57
57
|
end
|
58
58
|
|
59
59
|
def assert_BLAST_output_files
|
@@ -69,8 +69,8 @@ module GeneValidator
|
|
69
69
|
def assert_output_dir_does_not_exist
|
70
70
|
output_dir = "#{@opt[:input_fasta_file]}.html"
|
71
71
|
return unless File.exist?(output_dir)
|
72
|
-
$stderr.puts
|
73
|
-
$stderr.puts "
|
72
|
+
$stderr.puts 'The output directory already exists for this fasta file.'
|
73
|
+
$stderr.puts "\nPlease remove the following directory: #{output_dir}\n"
|
74
74
|
$stderr.puts "You can run the following command to remove the folder.\n"
|
75
75
|
$stderr.puts "\n $ rm -r #{output_dir} \n"
|
76
76
|
exit 1
|
@@ -102,14 +102,16 @@ module GeneValidator
|
|
102
102
|
fasta_content = IO.binread(@opt[:input_fasta_file])
|
103
103
|
type = BlastUtils.type_of_sequences(fasta_content)
|
104
104
|
return if type == :nucleotide || type == :protein
|
105
|
-
$stderr.puts '*** Error: The input files does not contain just protein
|
106
|
-
$stderr.puts ' nucleotide data.
|
105
|
+
$stderr.puts '*** Error: The input files does not contain just protein'
|
106
|
+
$stderr.puts ' or nucleotide data.'
|
107
|
+
$stderr.puts ' Please correct this and try again.'
|
107
108
|
exit 1
|
108
109
|
end
|
109
110
|
|
110
111
|
def export_bin_dirs
|
111
112
|
@opt[:bin].each do |bin|
|
112
|
-
|
113
|
+
bin = File.expand_path(bin)
|
114
|
+
if File.exist?(bin) && File.directory?(bin)
|
113
115
|
add_to_path(bin)
|
114
116
|
else
|
115
117
|
$stderr.puts '*** The following bin directory does not exist:'
|
@@ -120,6 +122,7 @@ module GeneValidator
|
|
120
122
|
|
121
123
|
## Checks if dir is in $PATH and if not, it adds the dir to the $PATH.
|
122
124
|
def add_to_path(bin_dir)
|
125
|
+
return unless bin_dir
|
123
126
|
return if ENV['PATH'].split(':').include?(bin_dir)
|
124
127
|
ENV['PATH'] = "#{bin_dir}:#{ENV['PATH']}"
|
125
128
|
end
|
@@ -157,12 +160,12 @@ module GeneValidator
|
|
157
160
|
|
158
161
|
def warn_if_remote_database(db)
|
159
162
|
return if db !~ /remote/
|
160
|
-
$stderr.puts #
|
163
|
+
$stderr.puts # a blank line
|
161
164
|
$stderr.puts 'Warning: BLAST will be carried out on remote servers.'
|
162
165
|
$stderr.puts 'This may take quite a bit of time.'
|
163
166
|
$stderr.puts 'You may want to install a local BLAST database for' \
|
164
167
|
' faster analyses.'
|
165
|
-
$stderr.puts #
|
168
|
+
$stderr.puts # a blank line
|
166
169
|
end
|
167
170
|
|
168
171
|
def assert_local_blast_database_exists(db)
|
data/lib/genevalidator/blast.rb
CHANGED
@@ -4,8 +4,8 @@ require 'forwardable'
|
|
4
4
|
|
5
5
|
require 'genevalidator/exceptions'
|
6
6
|
require 'genevalidator/hsp'
|
7
|
-
require 'genevalidator/sequences'
|
8
7
|
require 'genevalidator/output'
|
8
|
+
require 'genevalidator/query'
|
9
9
|
|
10
10
|
module GeneValidator
|
11
11
|
# Contains methods that run BLAST and methods that analyse sequences
|
@@ -36,7 +36,7 @@ module GeneValidator
|
|
36
36
|
" #{threads}"
|
37
37
|
|
38
38
|
cmd = "echo \"#{query}\" | #{blastcmd}"
|
39
|
-
`#{cmd}
|
39
|
+
`#{cmd} >/dev/null 2>&1`
|
40
40
|
end
|
41
41
|
|
42
42
|
##
|
@@ -54,7 +54,7 @@ module GeneValidator
|
|
54
54
|
num_threads = opt[:num_threads])
|
55
55
|
return if opt[:blast_xml_file] || opt[:blast_tabular_file]
|
56
56
|
|
57
|
-
$stderr.puts 'Running BLAST'
|
57
|
+
$stderr.puts 'Running BLAST. This may take a while.'
|
58
58
|
opt[:blast_xml_file] = input_file + '.blast_xml'
|
59
59
|
|
60
60
|
blast_type = (seq_type == :protein) ? 'blastp' : 'blastx'
|
@@ -65,11 +65,16 @@ module GeneValidator
|
|
65
65
|
" -out '#{opt[:blast_xml_file]}' -db #{db} " \
|
66
66
|
" -evalue #{EVALUE} -outfmt 5 #{threads}"
|
67
67
|
|
68
|
-
`#{blastcmd}`
|
68
|
+
`#{blastcmd} >/dev/null 2>&1`
|
69
69
|
return unless File.zero?(opt[:blast_xml_file])
|
70
|
-
$stderr.puts 'Blast failed to run on the input file.
|
71
|
-
|
72
|
-
|
70
|
+
$stderr.puts 'Blast failed to run on the input file.'
|
71
|
+
if opt[:db] !~ /remote/
|
72
|
+
$stderr.puts 'Please ensure that the BLAST database exists and try'
|
73
|
+
$stderr.puts 'again.'
|
74
|
+
else
|
75
|
+
$stderr.puts 'You are using BLAST with a remote database. Please'
|
76
|
+
$stderr.puts 'ensure that you have internet access and try again.'
|
77
|
+
end
|
73
78
|
end
|
74
79
|
|
75
80
|
##
|
@@ -86,7 +91,7 @@ module GeneValidator
|
|
86
91
|
# parse blast the xml output and get the hits
|
87
92
|
# hits obtained are proteins! (we use only blastp and blastx)
|
88
93
|
iter.each do |hit|
|
89
|
-
seq =
|
94
|
+
seq = Query.new
|
90
95
|
|
91
96
|
seq.length_protein = hit.len.to_i
|
92
97
|
seq.type = :protein
|
@@ -126,7 +131,8 @@ module GeneValidator
|
|
126
131
|
end
|
127
132
|
current_hsp.align_len = hsp.align_len.to_i
|
128
133
|
current_hsp.identity = hsp.identity.to_i
|
129
|
-
current_hsp.pidentity = (100 * hsp.identity /
|
134
|
+
current_hsp.pidentity = (100 * hsp.identity / hsp.align_len.to_f)
|
135
|
+
.round(2)
|
130
136
|
|
131
137
|
hsps.push(current_hsp)
|
132
138
|
end
|
@@ -158,10 +164,11 @@ module GeneValidator
|
|
158
164
|
# the first sequence does not need to have a fasta definition line
|
159
165
|
sequences = fasta_format_string.split(/^>.*$/).delete_if(&:empty?)
|
160
166
|
# get all sequence types
|
161
|
-
sequence_types = sequences.collect { |seq| guess_sequence_type(seq) }
|
167
|
+
sequence_types = sequences.collect { |seq| guess_sequence_type(seq) }
|
168
|
+
.uniq.compact
|
162
169
|
|
163
170
|
return nil if sequence_types.empty?
|
164
|
-
|
171
|
+
sequence_types.first if sequence_types.length == 1
|
165
172
|
end
|
166
173
|
|
167
174
|
##
|
@@ -96,7 +96,8 @@ module GeneValidator
|
|
96
96
|
d
|
97
97
|
end
|
98
98
|
|
99
|
-
# Returns the euclidian distance between the current cluster and the one
|
99
|
+
# Returns the euclidian distance between the current cluster and the one
|
100
|
+
# given as parameter
|
100
101
|
# Params:
|
101
102
|
# +cluster+: Cluster object
|
102
103
|
# +method+: 0 or 1
|
@@ -126,7 +127,7 @@ module GeneValidator
|
|
126
127
|
# Returns within cluster sum of squares
|
127
128
|
def wss(objects = nil)
|
128
129
|
if objects.nil?
|
129
|
-
objects = @objects.map { |x|
|
130
|
+
objects = @objects.map { |x| Array.new(x[1], x[0]) }.flatten
|
130
131
|
end
|
131
132
|
|
132
133
|
cluster_mean = mean
|
@@ -181,7 +182,8 @@ module GeneValidator
|
|
181
182
|
d
|
182
183
|
end
|
183
184
|
|
184
|
-
# Returns the euclidian distance between the current cluster and the one
|
185
|
+
# Returns the euclidian distance between the current cluster and the one
|
186
|
+
# given as parameter
|
185
187
|
# Params:
|
186
188
|
# +cluster+: Cluster object
|
187
189
|
# +method+: 0 or 1
|
@@ -212,7 +214,7 @@ module GeneValidator
|
|
212
214
|
# Returns within cluster sum of squares
|
213
215
|
def wss(lengths = nil)
|
214
216
|
if lengths.nil?
|
215
|
-
lengths = @lengths.map { |x|
|
217
|
+
lengths = @lengths.map { |x| Array.new(x[1], x[0]) }.flatten
|
216
218
|
end
|
217
219
|
|
218
220
|
cluster_mean = mean
|
@@ -226,12 +228,13 @@ module GeneValidator
|
|
226
228
|
##
|
227
229
|
# Returns the standard deviation of a set of values
|
228
230
|
# Params:
|
229
|
-
# +lengths+: a vector of values (optional, by default it takes the values
|
231
|
+
# +lengths+: a vector of values (optional, by default it takes the values
|
232
|
+
# in the cluster)
|
230
233
|
# Output:
|
231
234
|
# Real number
|
232
235
|
def standard_deviation(lengths = nil)
|
233
236
|
if lengths.nil?
|
234
|
-
lengths = @lengths.map { |x|
|
237
|
+
lengths = @lengths.map { |x| Array.new(x[1], x[0]) }.flatten
|
235
238
|
end
|
236
239
|
|
237
240
|
cluster_mean = mean
|
@@ -250,8 +253,8 @@ module GeneValidator
|
|
250
253
|
# Output:
|
251
254
|
# Real number
|
252
255
|
def deviation(clusters, queryLength)
|
253
|
-
hits = clusters.map { |c| c.lengths.map { |x|
|
254
|
-
raw_hits = clusters.map { |c| c.lengths.map { |x|
|
256
|
+
hits = clusters.map { |c| c.lengths.map { |x| Array.new(x[1], x[0]) }.flatten }.flatten
|
257
|
+
raw_hits = clusters.map { |c| c.lengths.map { |x| Array.new(x[1], x[0]) }.flatten }.flatten.to_s.gsub('[', '').gsub(']', '')
|
255
258
|
R.eval("sd = sd(c(#{raw_hits}))")
|
256
259
|
sd = R.pull('sd')
|
257
260
|
sd = standard_deviation(hits)
|
@@ -313,7 +316,8 @@ module GeneValidator
|
|
313
316
|
@clusters = []
|
314
317
|
end
|
315
318
|
|
316
|
-
def hierarchical_clusterization_2d(no_clusters = 0, distance_method = 0,
|
319
|
+
def hierarchical_clusterization_2d(no_clusters = 0, distance_method = 0,
|
320
|
+
vec = @values, debug = false)
|
317
321
|
clusters = []
|
318
322
|
|
319
323
|
if vec.length == 1
|
@@ -332,11 +336,9 @@ module GeneValidator
|
|
332
336
|
|
333
337
|
# clusters = array of clusters
|
334
338
|
# initially each length belongs to a different cluster
|
335
|
-
histogram.each do |
|
336
|
-
if debug
|
337
|
-
|
338
|
-
end
|
339
|
-
hash = { elem[0] => elem[1] }
|
339
|
+
histogram.each do |e|
|
340
|
+
$stderr.puts "pair (#{e[0].x} #{e[0].y}) appears #{e[1]} times" if debug
|
341
|
+
hash = { e[0] => e[1] }
|
340
342
|
cluster = PairCluster.new(hash)
|
341
343
|
clusters.push(cluster)
|
342
344
|
end
|
@@ -352,7 +354,7 @@ module GeneValidator
|
|
352
354
|
# stop condition 1
|
353
355
|
break if no_clusters != 0 && clusters.length == no_clusters
|
354
356
|
|
355
|
-
iteration
|
357
|
+
iteration += iteration
|
356
358
|
$stderr.puts "\nIteration #{iteration}" if debug
|
357
359
|
|
358
360
|
min_distance = 100_000_000
|
@@ -363,17 +365,19 @@ module GeneValidator
|
|
363
365
|
[*(0..(clusters.length - 2))].each do |i|
|
364
366
|
[*((i + 1)..(clusters.length - 1))].each do |j|
|
365
367
|
dist = clusters[i].distance(clusters[j], distance_method)
|
366
|
-
|
368
|
+
if debug
|
369
|
+
$stderr.puts "distance between clusters #{i} and #{j} is #{dist}"
|
370
|
+
end
|
367
371
|
current_density = clusters[i].density + clusters[j].density
|
368
372
|
if dist < min_distance
|
369
373
|
min_distance = dist
|
370
|
-
cluster1
|
371
|
-
cluster2
|
372
|
-
density
|
374
|
+
cluster1 = i
|
375
|
+
cluster2 = j
|
376
|
+
density = current_density
|
373
377
|
elsif dist == min_distance && density < current_density
|
374
378
|
cluster1 = i
|
375
379
|
cluster2 = j
|
376
|
-
density
|
380
|
+
density = current_density
|
377
381
|
end
|
378
382
|
end
|
379
383
|
end
|
@@ -402,8 +406,8 @@ module GeneValidator
|
|
402
406
|
end
|
403
407
|
|
404
408
|
##
|
405
|
-
# Makes an hierarchical clusterization until the most dense cluster is
|
406
|
-
# or the distance between clusters is sufficintly big
|
409
|
+
# Makes an hierarchical clusterization until the most dense cluster is
|
410
|
+
# obtained or the distance between clusters is sufficintly big
|
407
411
|
# or the desired number of clusters is obtained
|
408
412
|
# Params:
|
409
413
|
# +no_clusters+: stop test (number of clusters)
|
@@ -412,12 +416,13 @@ module GeneValidator
|
|
412
416
|
# +debug+: display debug information
|
413
417
|
# Output:
|
414
418
|
# vector of +Cluster+ objects
|
415
|
-
def hierarchical_clusterization(no_clusters = 0, distance_method = 0,
|
419
|
+
def hierarchical_clusterization(no_clusters = 0, distance_method = 0,
|
420
|
+
vec = @values, debug = false)
|
416
421
|
clusters = []
|
417
422
|
vec = vec.sort
|
418
423
|
|
419
424
|
if vec.length == 1
|
420
|
-
hash
|
425
|
+
hash = { vec[0] => 1 }
|
421
426
|
cluster = Cluster.new(hash)
|
422
427
|
clusters.push(cluster)
|
423
428
|
clusters
|
@@ -425,7 +430,7 @@ module GeneValidator
|
|
425
430
|
|
426
431
|
# Thresholds
|
427
432
|
threshold_distance = (0.25 * (vec.max - vec.min))
|
428
|
-
threshold_density
|
433
|
+
threshold_density = (0.5 * vec.length).to_i
|
429
434
|
|
430
435
|
# make a histogram from the input vector
|
431
436
|
histogram = Hash[vec.group_by { |x| x }.map { |k, vs| [k, vs.length] }]
|
@@ -447,11 +452,10 @@ module GeneValidator
|
|
447
452
|
# the loop stops according to the stop conditions
|
448
453
|
iteration = 0
|
449
454
|
loop do
|
450
|
-
|
451
455
|
# stop condition 1
|
452
456
|
break if no_clusters != 0 && clusters.length == no_clusters
|
453
457
|
|
454
|
-
iteration
|
458
|
+
iteration += iteration
|
455
459
|
$stderr.puts "\nIteration #{iteration}" if debug
|
456
460
|
|
457
461
|
min_distance = 100_000_000
|
@@ -460,7 +464,9 @@ module GeneValidator
|
|
460
464
|
|
461
465
|
clusters[0..clusters.length - 2].each_with_index do |_item, i|
|
462
466
|
dist = clusters[i].distance(clusters[i + 1], distance_method)
|
463
|
-
|
467
|
+
if debug
|
468
|
+
$stderr.puts "distance btwn clusters #{i} and #{i + 1} is #{dist}"
|
469
|
+
end
|
464
470
|
current_density = clusters[i].density + clusters[i + 1].density
|
465
471
|
if dist < min_distance
|
466
472
|
min_distance = dist
|
@@ -509,9 +515,7 @@ module GeneValidator
|
|
509
515
|
max_density = 0
|
510
516
|
max_density_cluster = 0
|
511
517
|
|
512
|
-
if clusters.nil?
|
513
|
-
nil
|
514
|
-
end
|
518
|
+
nil if clusters.nil?
|
515
519
|
|
516
520
|
clusters.each_with_index do |item, i|
|
517
521
|
if item.density > max_density
|