genevalidator 1.6.2 → 1.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +147 -76
- data/Rakefile +1 -1
- data/aux/files/css/genevalidator.compiled.min.css +16 -0
- data/aux/files/css/{bootstrap.min.css → src/bootstrap.min.css} +0 -0
- data/aux/files/css/{font-awesome.min.css → src/font-awesome.min.css} +0 -0
- data/aux/files/css/{style.css → src/style.css} +0 -0
- data/aux/files/js/genevalidator.compiled.min.js +28 -0
- data/aux/files/js/{bootstrap.min.js → src/bootstrap.min.js} +0 -0
- data/aux/files/js/{d3.v3.min.js → src/d3.v3.min.js} +0 -0
- data/aux/files/js/{jquery-2.1.1.min.js → src/jquery-2.1.1.min.js} +0 -0
- data/aux/files/js/{jquery.tablesorter.min.js → src/jquery.tablesorter.min.js} +0 -0
- data/aux/files/js/src/plots.js +814 -0
- data/aux/files/js/src/script.js +43 -0
- data/aux/json_header.erb +6 -6
- data/aux/json_query.erb +2 -1
- data/aux/template_footer.erb +0 -11
- data/aux/template_header.erb +4 -4
- data/aux/template_query.erb +1 -1
- data/bin/genevalidator +8 -6
- data/genevalidator.gemspec +1 -1
- data/lib/genevalidator.rb +7 -5
- data/lib/genevalidator/arg_validation.rb +12 -9
- data/lib/genevalidator/blast.rb +18 -11
- data/lib/genevalidator/clusterization.rb +35 -31
- data/lib/genevalidator/exceptions.rb +0 -1
- data/lib/genevalidator/get_raw_sequences.rb +115 -69
- data/lib/genevalidator/hsp.rb +8 -8
- data/lib/genevalidator/json_to_gv_results.rb +4 -4
- data/lib/genevalidator/output.rb +40 -41
- data/lib/genevalidator/pool.rb +5 -4
- data/lib/genevalidator/query.rb +37 -0
- data/lib/genevalidator/tabular_parser.rb +3 -4
- data/lib/genevalidator/validation.rb +16 -11
- data/lib/genevalidator/validation_alignment.rb +17 -23
- data/lib/genevalidator/validation_blast_reading_frame.rb +3 -3
- data/lib/genevalidator/validation_duplication.rb +8 -18
- data/lib/genevalidator/validation_gene_merge.rb +11 -9
- data/lib/genevalidator/validation_length_cluster.rb +8 -11
- data/lib/genevalidator/validation_length_rank.rb +5 -4
- data/lib/genevalidator/validation_open_reading_frame.rb +5 -5
- data/lib/genevalidator/version.rb +1 -1
- data/test/test_all_validations.rb +2 -1
- data/test/test_blast.rb +4 -3
- data/test/test_extended_array_methods.rb +2 -1
- data/test/{test_sequences.rb → test_query.rb} +5 -23
- data/test/test_validation_open_reading_frame.rb +7 -7
- data/test/test_validations.rb +8 -6
- metadata +16 -16
- data/aux/app_template_footer.erb +0 -1
- data/aux/app_template_header.erb +0 -12
- data/aux/files/js/plots.js +0 -828
- data/aux/files/js/script.js +0 -71
- data/lib/genevalidator/sequences.rb +0 -101
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
$(document).ready(function() {
|
|
2
|
+
if (window.chrome && (window.location.protocol === 'file:')){
|
|
3
|
+
$('#mainbrowseralertText').text("Some features are not supported in this browser and have been disabled.");
|
|
4
|
+
$('#browseralert').modal();
|
|
5
|
+
}
|
|
6
|
+
GV.toggleOverviewBtn(); // write overview to overview section
|
|
7
|
+
initTableSorter();
|
|
8
|
+
$("[data-toggle='tooltip']").tooltip(); //ToolTip
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
$(document).on( "click", "td, .plot_btn", function( event ) {
|
|
12
|
+
if ($(this).hasClass('success') || $(this).hasClass('danger')){
|
|
13
|
+
var title = $(this).attr('title');
|
|
14
|
+
var val = title.replace(/[ \/]/g, '');
|
|
15
|
+
GV.addData(this, val);
|
|
16
|
+
} else if ($(this).hasClass('plot_btn')){
|
|
17
|
+
GV.addData(this, 'all');
|
|
18
|
+
}
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
function initTableSorter() {
|
|
22
|
+
$.tablesorter.addParser({
|
|
23
|
+
id: 'star_scores', // called later when init the tablesorter
|
|
24
|
+
is: function() {
|
|
25
|
+
return false; // return false so this parser is not auto detected
|
|
26
|
+
},
|
|
27
|
+
format: function(s, table, cell, cellIndex) {
|
|
28
|
+
var $cell = $(cell);
|
|
29
|
+
if (cellIndex === 1) {
|
|
30
|
+
return $cell.attr('data-score') || s;
|
|
31
|
+
}
|
|
32
|
+
return s;
|
|
33
|
+
},
|
|
34
|
+
parsed: false,
|
|
35
|
+
type: 'numeric' // Setting type of data...
|
|
36
|
+
});
|
|
37
|
+
$('table').tablesorter({
|
|
38
|
+
headers: {
|
|
39
|
+
1 : { sorter: 'star_scores' } // Telling it to use custom parser...
|
|
40
|
+
},
|
|
41
|
+
sortList: [[0,0]],
|
|
42
|
+
});
|
|
43
|
+
}
|
data/aux/json_header.erb
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><title>GeneValidator: identify problems with gene predictions</title><meta content="text/html; charset=utf-8" http-equiv="Content-Type"><script type="text/javascript" src="files/js/
|
|
1
|
+
<!DOCTYPE html><html><head><title>GeneValidator: identify problems with gene predictions</title><meta content="text/html; charset=utf-8" http-equiv="Content-Type"><script type="text/javascript" src="files/js/genevalidator.compiled.min.js"></script><link href="files/css/genevalidator.compiled.min.css" rel="stylesheet" type="text/css"></head>
|
|
2
2
|
<body>
|
|
3
3
|
<div aria-hidden="true" aria-labelledby="myModalLabel3" class="modal" id="spinner1" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content"><div class="modal-body text-center"><h2>Loading ...</h2><i class="fa fa-spinner fa-5x fa-spin"></i></div></div></div></div>
|
|
4
4
|
<div class="modal fade" id="alert" aria-hidden="true" aria-labelledby="myModalLabel5" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">×</span><span class="sr-only">Close</span></button><h4 class="modal-title">Oops, Something went wrong!</h4></div><div class="modal-body"><p>This operation is not posible. There seems to be too many queries...</p></div><div class="modal-footer"><button type="button" class="btn btn-default" data-dismiss="modal">Close</button></div></div></div></div>
|
|
5
|
-
<div class="modal fade" id="browseralert" aria-hidden="true" aria-labelledby="browser" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content browser-alert"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">×</span><span class="sr-only">Close</span></button><h4 class="modal-title">Incompatible Browser - Please use Mozilla Firefox!</h4></div><div class="modal-body" id="browseralertbody"><p id="browseralertText">
|
|
6
|
-
<div class="container"><span class="menu_icon pull-right"><a href="#"
|
|
7
|
-
<div id="
|
|
5
|
+
<div class="modal fade" id="browseralert" aria-hidden="true" aria-labelledby="browser" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content browser-alert"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">×</span><span class="sr-only">Close</span></button><h4 class="modal-title">Incompatible Browser - Please use Mozilla Firefox!</h4></div><div class="modal-body" id="browseralertbody"><p id="browseralertText"><strong id="mainbrowseralertText">Sorry, this feature is not supported in your browser.</strong></p><p>This is because Chrome does not allow access to local files. Thus, to avoid this, simply use a different browser (like Firefox or Safari) or start a local server in the results folder.</p><p>Please use <strong>Mozilla FireFox</strong> to view this file.</p></div></div></div></div>
|
|
6
|
+
<div class="container"><span class="menu_icon pull-right"><a href="#" onclick="GV.toggleAllPlots(this);"><i class="fa fa-2x fa-bar-chart-o"></i><br><span id="show_all_plots">Show All Charts</span></a></span><span class="menu_icon pull-right"><a href="https://github.com/wurmlab/genevalidator" target="_blank"><i class="fa fa-2x fa-github"></i><br>Source Code</a></span><span class="menu_icon pull-right"><a href="http://wurmlab.github.io/tools/genevalidator/"><i class="fa fa-2x fa-info-circle"></i><br>About</a></span><div class="clearfix"></div><img class="logo" src="files/img/gene.png" alt="logo"><div class="page-title"><h1>Gene Validator <small>v<%= GeneValidator::VERSION %></small></h1><h4 class="subheading">Identify Problems with Gene Predictions</h4></div><br/><br/>
|
|
7
|
+
<div id="overview" class="text-left"><div id="overview_text"></div><button id="overview_btn" class="btn btn-primary btn-sm" data-toggle="button" onclick="GV.toggleOverviewBtn();"></button></div><br><br>
|
|
8
8
|
<table id="sortable_table" class="table table-striped table-collapsed table-bordered table-condensed tablesorter"><thead>
|
|
9
9
|
<tr id="header"><th>#</th><th>Ranking</th><th>Sequence Definition <span data-toggle="tooltip" title="Query definition as it apears in the input fasta file." data-placement="top"><i class="fa fa-question-circle"></i></span></th><th>No. Hits <span data-toggle="tooltip" title="Number of non-identical hits found by BLAST." data-placement="top"><i class="fa fa-question-circle"></i></span></th>
|
|
10
10
|
<% @json_array[0]['validations'].each do |_short_header, item| %>
|
|
11
|
-
<th class="sorter-false"><b><%= item['header']
|
|
12
|
-
<% if item['header'] == "
|
|
11
|
+
<th class="sorter-false"><b><%= item['header'] %> </b>
|
|
12
|
+
<% if item['header'] == "Length Cluster" || item['header'] == "Gene Merge" || item['header'] == "Main ORF" || item['header'] == "Missing/Extra Sequences" %>
|
|
13
13
|
<span data-toggle="tooltip" title="Charts available for this validation" data-placement="top"><i class="fa fa-bar-chart-o chartIcon"></i></span> <span data-toggle="tooltip" title="<%=item['description']%>" data-placement="top"><i class="fa fa-question-circle"></i></span>
|
|
14
14
|
<% else %>
|
|
15
15
|
<span data-toggle="tooltip" title="<%=item['description']%>" data-placement="top"><i class="fa fa-question-circle"></i></span>
|
data/aux/json_query.erb
CHANGED
|
@@ -4,7 +4,8 @@
|
|
|
4
4
|
<td title="Definition"><%= @row['definition'] %></td>
|
|
5
5
|
<td title="No. Hits"><%= @row['no_hits'] %></td>
|
|
6
6
|
<% @row['validations'].each do |_short_header, item| %>
|
|
7
|
-
<td title="<%=item['header']%>" class="<%=item['status']%>"><%= item['print']
|
|
7
|
+
<td title="<%=item['header']%>" class="<%=item['status']%>"><%= item['print'].gsub(' ', ' ').gsub('; ', '; ')
|
|
8
|
+
%></td>
|
|
8
9
|
<% end %>
|
|
9
10
|
<% if @row['validations'].select{|_short_header, item| item['graphs'] != nil}.map{|_short_header, item| item['graphs'].length}.inject(0){|r, e| r+e } != 0 %>
|
|
10
11
|
<td><button title="Show plots" class="plot_btn btn btn-default"><i class="fa fa-bar-chart-o"></i></button></td>
|
data/aux/template_footer.erb
CHANGED
|
@@ -1,15 +1,4 @@
|
|
|
1
1
|
</tbody></table>
|
|
2
|
-
<script>
|
|
3
|
-
var evaluation_div = document.getElementById('report_1');
|
|
4
|
-
var content_less = "<br><%= less %><div class=\"clearfix\"></div><br><button type=\"button\" onClick='add_content()' class=\"btn btn-primary btn-sm\"> Show More</button>";
|
|
5
|
-
var content = "<%= evaluation %> <div class=\"clearfix\"></div><br><button type=\"button\" onClick='evaluation_div.innerHTML=content_less' class=\"btn btn-primary btn-sm\"> Show Less</button>";
|
|
6
|
-
evaluation_div.innerHTML = content_less;
|
|
7
|
-
function add_content(){
|
|
8
|
-
evaluation_div = document.getElementById('report_1');
|
|
9
|
-
evaluation_div.innerHTML=content;
|
|
10
|
-
addOverallPlot('files/json/overview.json');
|
|
11
|
-
}
|
|
12
|
-
</script>
|
|
13
2
|
</div>
|
|
14
3
|
<% if output_files.length > 1 %>
|
|
15
4
|
<nav><ul class="pagination">
|
data/aux/template_header.erb
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
<!DOCTYPE html><html><head><title>GeneValidator: identify problems with gene predictions</title><meta content="text/html; charset=utf-8" http-equiv="Content-Type"><script type="text/javascript" src="files/js/
|
|
1
|
+
<!DOCTYPE html><html><head><title>GeneValidator: identify problems with gene predictions</title><meta content="text/html; charset=utf-8" http-equiv="Content-Type"><script type="text/javascript" src="files/js/genevalidator.compiled.min.js"></script><link href="files/css/genevalidator.compiled.min.css" rel="stylesheet" type="text/css"></head>
|
|
2
2
|
<body>
|
|
3
3
|
<div aria-hidden="true" aria-labelledby="myModalLabel3" class="modal" id="spinner1" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content"><div class="modal-body text-center"><h2>Loading ...</h2><i class="fa fa-spinner fa-5x fa-spin"></i></div></div></div></div>
|
|
4
4
|
<div class="modal fade" id="alert" aria-hidden="true" aria-labelledby="myModalLabel5" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">×</span><span class="sr-only">Close</span></button><h4 class="modal-title">Oops, Something went wrong!</h4></div><div class="modal-body"><p>This operation is not posible. There seems to be too many queries...</p></div><div class="modal-footer"><button type="button" class="btn btn-default" data-dismiss="modal">Close</button></div></div></div></div>
|
|
5
|
-
<div class="modal fade" id="browseralert" aria-hidden="true" aria-labelledby="browser" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content browser-alert"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">×</span><span class="sr-only">Close</span></button><h4 class="modal-title">Incompatible Browser - Please use Mozilla Firefox!</h4></div><div class="modal-body" id="browseralertbody"><p id="browseralertText">
|
|
6
|
-
<div class="container"><span class="menu_icon pull-right"><a href="#"
|
|
7
|
-
<div id="
|
|
5
|
+
<div class="modal fade" id="browseralert" aria-hidden="true" aria-labelledby="browser" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content browser-alert"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">×</span><span class="sr-only">Close</span></button><h4 class="modal-title">Incompatible Browser - Please use Mozilla Firefox!</h4></div><div class="modal-body" id="browseralertbody"><p id="browseralertText"><strong id="mainbrowseralertText">Sorry, this feature is not supported in your browser.</strong></p><p>This is because Chrome does not allow access to local files. Thus, to avoid this, simply use a different browser (like Firefox or Safari) or start a local server in the results folder.</p><p>Please use <strong>Mozilla FireFox</strong> to view this file.</p></div></div></div></div>
|
|
6
|
+
<div class="container"><span class="menu_icon pull-right"><a href="#" onclick="GV.toggleAllPlots(this);"><i class="fa fa-2x fa-bar-chart-o"></i><br><span id="show_all_plots">Show All Charts</span></a></span><span class="menu_icon pull-right"><a href="https://github.com/wurmlab/genevalidator" target="_blank"><i class="fa fa-2x fa-github"></i><br>Source Code</a></span><span class="menu_icon pull-right"><a href="http://wurmlab.github.io/tools/genevalidator/"><i class="fa fa-2x fa-info-circle"></i><br>About</a></span><div class="clearfix"></div><img class="logo" src="files/img/gene.png" alt="logo"><div class="page-title"><h1>Gene Validator <small>v<%= GeneValidator::VERSION %></small></h1><h4 class="subheading">Identify Problems with Gene Predictions</h4></div><br/><br/>
|
|
7
|
+
<div id="overview" class="text-left"><div id="overview_text"></div><button id="overview_btn" class="btn btn-primary btn-sm" data-toggle="button" onclick="GV.toggleOverviewBtn();"></button></div><br><br>
|
|
8
8
|
<table id="sortable_table" class="table table-striped table-collapsed table-bordered table-condensed tablesorter"><thead>
|
|
9
9
|
<tr id="header"><th>#</th><th>Ranking</th><th>Sequence Definition <span data-toggle="tooltip" title="Query definition as it apears in the input fasta file." data-placement="top"><i class="fa fa-question-circle"></i></span></th><th>No. Hits <span data-toggle="tooltip" title="Number of non-identical hits found by BLAST." data-placement="top"><i class="fa fa-question-circle"></i></span></th>
|
|
10
10
|
<% @validations.each do |item| %>
|
data/aux/template_query.erb
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
<tr data-target="toggle<%= @idx %>" data-jsonFile="files/json/<%= @config[:filename] %>_<%= @idx %>.json">
|
|
2
2
|
<td title="idx"><%= @idx %></td>
|
|
3
|
-
<td data-score="<%= overall_score %>"><div class="ratings"><div class="empty-stars"></div><div class="full-stars" style="width:<%= overall_score
|
|
3
|
+
<td data-score="<%= overall_score %>"><div class="ratings"><div class="empty-stars"></div><div class="full-stars" style="width:<%= overall_score %>%;"></div></div></td>
|
|
4
4
|
<td title="Definition"><%= @prediction_def %></td>
|
|
5
5
|
<td title="No. Hits"><%= @nr_hits %></td>
|
|
6
6
|
<% @validations.each do |item| %>
|
data/bin/genevalidator
CHANGED
|
@@ -11,7 +11,7 @@ opt_parser = OptionParser.new do |opts|
|
|
|
11
11
|
opts.banner = <<BANNER
|
|
12
12
|
|
|
13
13
|
USAGE:
|
|
14
|
-
|
|
14
|
+
genevalidator [OPTIONS] Input_File
|
|
15
15
|
|
|
16
16
|
ARGUMENTS:
|
|
17
17
|
Input_File: Path to the input fasta file containing the predicted sequences.
|
|
@@ -38,7 +38,7 @@ BANNER
|
|
|
38
38
|
opts.on('-d', '--db [BLAST_DATABASE]',
|
|
39
39
|
'Path to the BLAST database',
|
|
40
40
|
'GeneValidator also supports remote databases:',
|
|
41
|
-
'e.g.
|
|
41
|
+
'e.g. genevalidator -d "swissprot -remote" Input_File') do |db|
|
|
42
42
|
opt[:db] = db
|
|
43
43
|
end
|
|
44
44
|
|
|
@@ -50,8 +50,8 @@ BANNER
|
|
|
50
50
|
end
|
|
51
51
|
|
|
52
52
|
opts.on('-j', '--json_file [JSON_FILE]',
|
|
53
|
-
'Generate
|
|
54
|
-
'produced
|
|
53
|
+
'Generate HTML report from a JSON file (or a subset of a JSON file)',
|
|
54
|
+
'produced by GeneValidator') do |json|
|
|
55
55
|
opt[:json_file] = json
|
|
56
56
|
end
|
|
57
57
|
|
|
@@ -80,7 +80,7 @@ BANNER
|
|
|
80
80
|
'BLAST and Mafft within GeneValidator.') do |num_threads|
|
|
81
81
|
opt[:num_threads] = num_threads
|
|
82
82
|
end
|
|
83
|
-
|
|
83
|
+
|
|
84
84
|
opts.on('-r', '--raw_sequences [raw_seq]',
|
|
85
85
|
'Supply a fasta file of the raw sequences of all BLAST hits present',
|
|
86
86
|
'in the supplied BLAST XML or BLAST tabular file.') do |raw_seq|
|
|
@@ -90,7 +90,7 @@ BANNER
|
|
|
90
90
|
opts.on('-b', '--binaries [binaries]', Array,
|
|
91
91
|
'Path to BLAST and MAFFT bin folders (is added to $PATH variable)',
|
|
92
92
|
'To be provided as follows:',
|
|
93
|
-
'
|
|
93
|
+
'e.g. genevalidator -b /blast/bin/path/ -b /mafft/bin/path/') do |bin|
|
|
94
94
|
(opt[:bin] ||= []).concat(bin)
|
|
95
95
|
end
|
|
96
96
|
|
|
@@ -127,9 +127,11 @@ start = Time.now
|
|
|
127
127
|
if opt[:extract_raw_seqs] && opt[:raw_sequences].nil?
|
|
128
128
|
GeneValidator.opt = opt
|
|
129
129
|
GeneValidator.config = {}
|
|
130
|
+
# GVArgValidation.run_raw_seqs
|
|
130
131
|
GeneValidator::RawSequences.run
|
|
131
132
|
elsif opt[:json_file]
|
|
132
133
|
GeneValidator.opt = opt
|
|
134
|
+
# GVArgValidation.run_json_to_gv_results
|
|
133
135
|
GeneValidator::JsonToGVResults.run
|
|
134
136
|
else
|
|
135
137
|
GeneValidator.init(opt)
|
data/genevalidator.gemspec
CHANGED
data/lib/genevalidator.rb
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
require 'fileutils'
|
|
2
|
-
|
|
3
2
|
require 'bio-blastxmlparser'
|
|
4
3
|
|
|
5
4
|
require 'genevalidator/arg_validation'
|
|
@@ -68,16 +67,19 @@ module GeneValidator
|
|
|
68
67
|
# Parse the blast output and run validations
|
|
69
68
|
def run
|
|
70
69
|
# Run BLAST on all sequences (generates @opt[:blast_xml_file])
|
|
71
|
-
#
|
|
70
|
+
# if no BLAST OUTPUT file provided...
|
|
72
71
|
unless @opt[:blast_xml_file] || @opt[:blast_tabular_file]
|
|
73
72
|
BlastUtils.run_blast_on_input_file
|
|
74
73
|
end
|
|
75
|
-
# Obtain fasta file of all BLAST hits
|
|
76
|
-
|
|
74
|
+
# Obtain fasta file of all BLAST hits if running align or dup validations
|
|
75
|
+
if @opt[:validations].include?('align') ||
|
|
76
|
+
@opt[:validations].include?('dup')
|
|
77
|
+
RawSequences.run unless @opt[:raw_sequences]
|
|
78
|
+
end
|
|
77
79
|
# Run Validations
|
|
78
80
|
iterator = parse_blast_output_file
|
|
79
81
|
(Validations.new).run_validations(iterator)
|
|
80
|
-
|
|
82
|
+
|
|
81
83
|
Output.write_json_file(@config[:json_output], @config[:json_file])
|
|
82
84
|
Output.print_footer(@overview, @config)
|
|
83
85
|
end
|
|
@@ -26,7 +26,6 @@ module GeneValidator
|
|
|
26
26
|
check_num_threads
|
|
27
27
|
|
|
28
28
|
export_bin_dirs unless @opt[:bin].nil?
|
|
29
|
-
|
|
30
29
|
Blast.validate(opt) unless @opt[:test]
|
|
31
30
|
assert_mafft_installation
|
|
32
31
|
end
|
|
@@ -53,7 +52,8 @@ module GeneValidator
|
|
|
53
52
|
$stderr.puts 'Number of threads can not be lower than 0'
|
|
54
53
|
end
|
|
55
54
|
return unless @opt[:num_threads] > 256
|
|
56
|
-
$stderr.puts "Number of threads set at #{@opt[:num_threads]} is
|
|
55
|
+
$stderr.puts "Number of threads set at #{@opt[:num_threads]} is" \
|
|
56
|
+
' unusually high.'
|
|
57
57
|
end
|
|
58
58
|
|
|
59
59
|
def assert_BLAST_output_files
|
|
@@ -69,8 +69,8 @@ module GeneValidator
|
|
|
69
69
|
def assert_output_dir_does_not_exist
|
|
70
70
|
output_dir = "#{@opt[:input_fasta_file]}.html"
|
|
71
71
|
return unless File.exist?(output_dir)
|
|
72
|
-
$stderr.puts
|
|
73
|
-
$stderr.puts "
|
|
72
|
+
$stderr.puts 'The output directory already exists for this fasta file.'
|
|
73
|
+
$stderr.puts "\nPlease remove the following directory: #{output_dir}\n"
|
|
74
74
|
$stderr.puts "You can run the following command to remove the folder.\n"
|
|
75
75
|
$stderr.puts "\n $ rm -r #{output_dir} \n"
|
|
76
76
|
exit 1
|
|
@@ -102,14 +102,16 @@ module GeneValidator
|
|
|
102
102
|
fasta_content = IO.binread(@opt[:input_fasta_file])
|
|
103
103
|
type = BlastUtils.type_of_sequences(fasta_content)
|
|
104
104
|
return if type == :nucleotide || type == :protein
|
|
105
|
-
$stderr.puts '*** Error: The input files does not contain just protein
|
|
106
|
-
$stderr.puts ' nucleotide data.
|
|
105
|
+
$stderr.puts '*** Error: The input files does not contain just protein'
|
|
106
|
+
$stderr.puts ' or nucleotide data.'
|
|
107
|
+
$stderr.puts ' Please correct this and try again.'
|
|
107
108
|
exit 1
|
|
108
109
|
end
|
|
109
110
|
|
|
110
111
|
def export_bin_dirs
|
|
111
112
|
@opt[:bin].each do |bin|
|
|
112
|
-
|
|
113
|
+
bin = File.expand_path(bin)
|
|
114
|
+
if File.exist?(bin) && File.directory?(bin)
|
|
113
115
|
add_to_path(bin)
|
|
114
116
|
else
|
|
115
117
|
$stderr.puts '*** The following bin directory does not exist:'
|
|
@@ -120,6 +122,7 @@ module GeneValidator
|
|
|
120
122
|
|
|
121
123
|
## Checks if dir is in $PATH and if not, it adds the dir to the $PATH.
|
|
122
124
|
def add_to_path(bin_dir)
|
|
125
|
+
return unless bin_dir
|
|
123
126
|
return if ENV['PATH'].split(':').include?(bin_dir)
|
|
124
127
|
ENV['PATH'] = "#{bin_dir}:#{ENV['PATH']}"
|
|
125
128
|
end
|
|
@@ -157,12 +160,12 @@ module GeneValidator
|
|
|
157
160
|
|
|
158
161
|
def warn_if_remote_database(db)
|
|
159
162
|
return if db !~ /remote/
|
|
160
|
-
$stderr.puts #
|
|
163
|
+
$stderr.puts # a blank line
|
|
161
164
|
$stderr.puts 'Warning: BLAST will be carried out on remote servers.'
|
|
162
165
|
$stderr.puts 'This may take quite a bit of time.'
|
|
163
166
|
$stderr.puts 'You may want to install a local BLAST database for' \
|
|
164
167
|
' faster analyses.'
|
|
165
|
-
$stderr.puts #
|
|
168
|
+
$stderr.puts # a blank line
|
|
166
169
|
end
|
|
167
170
|
|
|
168
171
|
def assert_local_blast_database_exists(db)
|
data/lib/genevalidator/blast.rb
CHANGED
|
@@ -4,8 +4,8 @@ require 'forwardable'
|
|
|
4
4
|
|
|
5
5
|
require 'genevalidator/exceptions'
|
|
6
6
|
require 'genevalidator/hsp'
|
|
7
|
-
require 'genevalidator/sequences'
|
|
8
7
|
require 'genevalidator/output'
|
|
8
|
+
require 'genevalidator/query'
|
|
9
9
|
|
|
10
10
|
module GeneValidator
|
|
11
11
|
# Contains methods that run BLAST and methods that analyse sequences
|
|
@@ -36,7 +36,7 @@ module GeneValidator
|
|
|
36
36
|
" #{threads}"
|
|
37
37
|
|
|
38
38
|
cmd = "echo \"#{query}\" | #{blastcmd}"
|
|
39
|
-
`#{cmd}
|
|
39
|
+
`#{cmd} >/dev/null 2>&1`
|
|
40
40
|
end
|
|
41
41
|
|
|
42
42
|
##
|
|
@@ -54,7 +54,7 @@ module GeneValidator
|
|
|
54
54
|
num_threads = opt[:num_threads])
|
|
55
55
|
return if opt[:blast_xml_file] || opt[:blast_tabular_file]
|
|
56
56
|
|
|
57
|
-
$stderr.puts 'Running BLAST'
|
|
57
|
+
$stderr.puts 'Running BLAST. This may take a while.'
|
|
58
58
|
opt[:blast_xml_file] = input_file + '.blast_xml'
|
|
59
59
|
|
|
60
60
|
blast_type = (seq_type == :protein) ? 'blastp' : 'blastx'
|
|
@@ -65,11 +65,16 @@ module GeneValidator
|
|
|
65
65
|
" -out '#{opt[:blast_xml_file]}' -db #{db} " \
|
|
66
66
|
" -evalue #{EVALUE} -outfmt 5 #{threads}"
|
|
67
67
|
|
|
68
|
-
`#{blastcmd}`
|
|
68
|
+
`#{blastcmd} >/dev/null 2>&1`
|
|
69
69
|
return unless File.zero?(opt[:blast_xml_file])
|
|
70
|
-
$stderr.puts 'Blast failed to run on the input file.
|
|
71
|
-
|
|
72
|
-
|
|
70
|
+
$stderr.puts 'Blast failed to run on the input file.'
|
|
71
|
+
if opt[:db] !~ /remote/
|
|
72
|
+
$stderr.puts 'Please ensure that the BLAST database exists and try'
|
|
73
|
+
$stderr.puts 'again.'
|
|
74
|
+
else
|
|
75
|
+
$stderr.puts 'You are using BLAST with a remote database. Please'
|
|
76
|
+
$stderr.puts 'ensure that you have internet access and try again.'
|
|
77
|
+
end
|
|
73
78
|
end
|
|
74
79
|
|
|
75
80
|
##
|
|
@@ -86,7 +91,7 @@ module GeneValidator
|
|
|
86
91
|
# parse blast the xml output and get the hits
|
|
87
92
|
# hits obtained are proteins! (we use only blastp and blastx)
|
|
88
93
|
iter.each do |hit|
|
|
89
|
-
seq =
|
|
94
|
+
seq = Query.new
|
|
90
95
|
|
|
91
96
|
seq.length_protein = hit.len.to_i
|
|
92
97
|
seq.type = :protein
|
|
@@ -126,7 +131,8 @@ module GeneValidator
|
|
|
126
131
|
end
|
|
127
132
|
current_hsp.align_len = hsp.align_len.to_i
|
|
128
133
|
current_hsp.identity = hsp.identity.to_i
|
|
129
|
-
current_hsp.pidentity = (100 * hsp.identity /
|
|
134
|
+
current_hsp.pidentity = (100 * hsp.identity / hsp.align_len.to_f)
|
|
135
|
+
.round(2)
|
|
130
136
|
|
|
131
137
|
hsps.push(current_hsp)
|
|
132
138
|
end
|
|
@@ -158,10 +164,11 @@ module GeneValidator
|
|
|
158
164
|
# the first sequence does not need to have a fasta definition line
|
|
159
165
|
sequences = fasta_format_string.split(/^>.*$/).delete_if(&:empty?)
|
|
160
166
|
# get all sequence types
|
|
161
|
-
sequence_types = sequences.collect { |seq| guess_sequence_type(seq) }
|
|
167
|
+
sequence_types = sequences.collect { |seq| guess_sequence_type(seq) }
|
|
168
|
+
.uniq.compact
|
|
162
169
|
|
|
163
170
|
return nil if sequence_types.empty?
|
|
164
|
-
|
|
171
|
+
sequence_types.first if sequence_types.length == 1
|
|
165
172
|
end
|
|
166
173
|
|
|
167
174
|
##
|
|
@@ -96,7 +96,8 @@ module GeneValidator
|
|
|
96
96
|
d
|
|
97
97
|
end
|
|
98
98
|
|
|
99
|
-
# Returns the euclidian distance between the current cluster and the one
|
|
99
|
+
# Returns the euclidian distance between the current cluster and the one
|
|
100
|
+
# given as parameter
|
|
100
101
|
# Params:
|
|
101
102
|
# +cluster+: Cluster object
|
|
102
103
|
# +method+: 0 or 1
|
|
@@ -126,7 +127,7 @@ module GeneValidator
|
|
|
126
127
|
# Returns within cluster sum of squares
|
|
127
128
|
def wss(objects = nil)
|
|
128
129
|
if objects.nil?
|
|
129
|
-
objects = @objects.map { |x|
|
|
130
|
+
objects = @objects.map { |x| Array.new(x[1], x[0]) }.flatten
|
|
130
131
|
end
|
|
131
132
|
|
|
132
133
|
cluster_mean = mean
|
|
@@ -181,7 +182,8 @@ module GeneValidator
|
|
|
181
182
|
d
|
|
182
183
|
end
|
|
183
184
|
|
|
184
|
-
# Returns the euclidian distance between the current cluster and the one
|
|
185
|
+
# Returns the euclidian distance between the current cluster and the one
|
|
186
|
+
# given as parameter
|
|
185
187
|
# Params:
|
|
186
188
|
# +cluster+: Cluster object
|
|
187
189
|
# +method+: 0 or 1
|
|
@@ -212,7 +214,7 @@ module GeneValidator
|
|
|
212
214
|
# Returns within cluster sum of squares
|
|
213
215
|
def wss(lengths = nil)
|
|
214
216
|
if lengths.nil?
|
|
215
|
-
lengths = @lengths.map { |x|
|
|
217
|
+
lengths = @lengths.map { |x| Array.new(x[1], x[0]) }.flatten
|
|
216
218
|
end
|
|
217
219
|
|
|
218
220
|
cluster_mean = mean
|
|
@@ -226,12 +228,13 @@ module GeneValidator
|
|
|
226
228
|
##
|
|
227
229
|
# Returns the standard deviation of a set of values
|
|
228
230
|
# Params:
|
|
229
|
-
# +lengths+: a vector of values (optional, by default it takes the values
|
|
231
|
+
# +lengths+: a vector of values (optional, by default it takes the values
|
|
232
|
+
# in the cluster)
|
|
230
233
|
# Output:
|
|
231
234
|
# Real number
|
|
232
235
|
def standard_deviation(lengths = nil)
|
|
233
236
|
if lengths.nil?
|
|
234
|
-
lengths = @lengths.map { |x|
|
|
237
|
+
lengths = @lengths.map { |x| Array.new(x[1], x[0]) }.flatten
|
|
235
238
|
end
|
|
236
239
|
|
|
237
240
|
cluster_mean = mean
|
|
@@ -250,8 +253,8 @@ module GeneValidator
|
|
|
250
253
|
# Output:
|
|
251
254
|
# Real number
|
|
252
255
|
def deviation(clusters, queryLength)
|
|
253
|
-
hits = clusters.map { |c| c.lengths.map { |x|
|
|
254
|
-
raw_hits = clusters.map { |c| c.lengths.map { |x|
|
|
256
|
+
hits = clusters.map { |c| c.lengths.map { |x| Array.new(x[1], x[0]) }.flatten }.flatten
|
|
257
|
+
raw_hits = clusters.map { |c| c.lengths.map { |x| Array.new(x[1], x[0]) }.flatten }.flatten.to_s.gsub('[', '').gsub(']', '')
|
|
255
258
|
R.eval("sd = sd(c(#{raw_hits}))")
|
|
256
259
|
sd = R.pull('sd')
|
|
257
260
|
sd = standard_deviation(hits)
|
|
@@ -313,7 +316,8 @@ module GeneValidator
|
|
|
313
316
|
@clusters = []
|
|
314
317
|
end
|
|
315
318
|
|
|
316
|
-
def hierarchical_clusterization_2d(no_clusters = 0, distance_method = 0,
|
|
319
|
+
def hierarchical_clusterization_2d(no_clusters = 0, distance_method = 0,
|
|
320
|
+
vec = @values, debug = false)
|
|
317
321
|
clusters = []
|
|
318
322
|
|
|
319
323
|
if vec.length == 1
|
|
@@ -332,11 +336,9 @@ module GeneValidator
|
|
|
332
336
|
|
|
333
337
|
# clusters = array of clusters
|
|
334
338
|
# initially each length belongs to a different cluster
|
|
335
|
-
histogram.each do |
|
|
336
|
-
if debug
|
|
337
|
-
|
|
338
|
-
end
|
|
339
|
-
hash = { elem[0] => elem[1] }
|
|
339
|
+
histogram.each do |e|
|
|
340
|
+
$stderr.puts "pair (#{e[0].x} #{e[0].y}) appears #{e[1]} times" if debug
|
|
341
|
+
hash = { e[0] => e[1] }
|
|
340
342
|
cluster = PairCluster.new(hash)
|
|
341
343
|
clusters.push(cluster)
|
|
342
344
|
end
|
|
@@ -352,7 +354,7 @@ module GeneValidator
|
|
|
352
354
|
# stop condition 1
|
|
353
355
|
break if no_clusters != 0 && clusters.length == no_clusters
|
|
354
356
|
|
|
355
|
-
iteration
|
|
357
|
+
iteration += iteration
|
|
356
358
|
$stderr.puts "\nIteration #{iteration}" if debug
|
|
357
359
|
|
|
358
360
|
min_distance = 100_000_000
|
|
@@ -363,17 +365,19 @@ module GeneValidator
|
|
|
363
365
|
[*(0..(clusters.length - 2))].each do |i|
|
|
364
366
|
[*((i + 1)..(clusters.length - 1))].each do |j|
|
|
365
367
|
dist = clusters[i].distance(clusters[j], distance_method)
|
|
366
|
-
|
|
368
|
+
if debug
|
|
369
|
+
$stderr.puts "distance between clusters #{i} and #{j} is #{dist}"
|
|
370
|
+
end
|
|
367
371
|
current_density = clusters[i].density + clusters[j].density
|
|
368
372
|
if dist < min_distance
|
|
369
373
|
min_distance = dist
|
|
370
|
-
cluster1
|
|
371
|
-
cluster2
|
|
372
|
-
density
|
|
374
|
+
cluster1 = i
|
|
375
|
+
cluster2 = j
|
|
376
|
+
density = current_density
|
|
373
377
|
elsif dist == min_distance && density < current_density
|
|
374
378
|
cluster1 = i
|
|
375
379
|
cluster2 = j
|
|
376
|
-
density
|
|
380
|
+
density = current_density
|
|
377
381
|
end
|
|
378
382
|
end
|
|
379
383
|
end
|
|
@@ -402,8 +406,8 @@ module GeneValidator
|
|
|
402
406
|
end
|
|
403
407
|
|
|
404
408
|
##
|
|
405
|
-
# Makes an hierarchical clusterization until the most dense cluster is
|
|
406
|
-
# or the distance between clusters is sufficintly big
|
|
409
|
+
# Makes an hierarchical clusterization until the most dense cluster is
|
|
410
|
+
# obtained or the distance between clusters is sufficintly big
|
|
407
411
|
# or the desired number of clusters is obtained
|
|
408
412
|
# Params:
|
|
409
413
|
# +no_clusters+: stop test (number of clusters)
|
|
@@ -412,12 +416,13 @@ module GeneValidator
|
|
|
412
416
|
# +debug+: display debug information
|
|
413
417
|
# Output:
|
|
414
418
|
# vector of +Cluster+ objects
|
|
415
|
-
def hierarchical_clusterization(no_clusters = 0, distance_method = 0,
|
|
419
|
+
def hierarchical_clusterization(no_clusters = 0, distance_method = 0,
|
|
420
|
+
vec = @values, debug = false)
|
|
416
421
|
clusters = []
|
|
417
422
|
vec = vec.sort
|
|
418
423
|
|
|
419
424
|
if vec.length == 1
|
|
420
|
-
hash
|
|
425
|
+
hash = { vec[0] => 1 }
|
|
421
426
|
cluster = Cluster.new(hash)
|
|
422
427
|
clusters.push(cluster)
|
|
423
428
|
clusters
|
|
@@ -425,7 +430,7 @@ module GeneValidator
|
|
|
425
430
|
|
|
426
431
|
# Thresholds
|
|
427
432
|
threshold_distance = (0.25 * (vec.max - vec.min))
|
|
428
|
-
threshold_density
|
|
433
|
+
threshold_density = (0.5 * vec.length).to_i
|
|
429
434
|
|
|
430
435
|
# make a histogram from the input vector
|
|
431
436
|
histogram = Hash[vec.group_by { |x| x }.map { |k, vs| [k, vs.length] }]
|
|
@@ -447,11 +452,10 @@ module GeneValidator
|
|
|
447
452
|
# the loop stops according to the stop conditions
|
|
448
453
|
iteration = 0
|
|
449
454
|
loop do
|
|
450
|
-
|
|
451
455
|
# stop condition 1
|
|
452
456
|
break if no_clusters != 0 && clusters.length == no_clusters
|
|
453
457
|
|
|
454
|
-
iteration
|
|
458
|
+
iteration += iteration
|
|
455
459
|
$stderr.puts "\nIteration #{iteration}" if debug
|
|
456
460
|
|
|
457
461
|
min_distance = 100_000_000
|
|
@@ -460,7 +464,9 @@ module GeneValidator
|
|
|
460
464
|
|
|
461
465
|
clusters[0..clusters.length - 2].each_with_index do |_item, i|
|
|
462
466
|
dist = clusters[i].distance(clusters[i + 1], distance_method)
|
|
463
|
-
|
|
467
|
+
if debug
|
|
468
|
+
$stderr.puts "distance btwn clusters #{i} and #{i + 1} is #{dist}"
|
|
469
|
+
end
|
|
464
470
|
current_density = clusters[i].density + clusters[i + 1].density
|
|
465
471
|
if dist < min_distance
|
|
466
472
|
min_distance = dist
|
|
@@ -509,9 +515,7 @@ module GeneValidator
|
|
|
509
515
|
max_density = 0
|
|
510
516
|
max_density_cluster = 0
|
|
511
517
|
|
|
512
|
-
if clusters.nil?
|
|
513
|
-
nil
|
|
514
|
-
end
|
|
518
|
+
nil if clusters.nil?
|
|
515
519
|
|
|
516
520
|
clusters.each_with_index do |item, i|
|
|
517
521
|
if item.density > max_density
|