genevalidator 1.6.2 → 1.6.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +147 -76
  3. data/Rakefile +1 -1
  4. data/aux/files/css/genevalidator.compiled.min.css +16 -0
  5. data/aux/files/css/{bootstrap.min.css → src/bootstrap.min.css} +0 -0
  6. data/aux/files/css/{font-awesome.min.css → src/font-awesome.min.css} +0 -0
  7. data/aux/files/css/{style.css → src/style.css} +0 -0
  8. data/aux/files/js/genevalidator.compiled.min.js +28 -0
  9. data/aux/files/js/{bootstrap.min.js → src/bootstrap.min.js} +0 -0
  10. data/aux/files/js/{d3.v3.min.js → src/d3.v3.min.js} +0 -0
  11. data/aux/files/js/{jquery-2.1.1.min.js → src/jquery-2.1.1.min.js} +0 -0
  12. data/aux/files/js/{jquery.tablesorter.min.js → src/jquery.tablesorter.min.js} +0 -0
  13. data/aux/files/js/src/plots.js +814 -0
  14. data/aux/files/js/src/script.js +43 -0
  15. data/aux/json_header.erb +6 -6
  16. data/aux/json_query.erb +2 -1
  17. data/aux/template_footer.erb +0 -11
  18. data/aux/template_header.erb +4 -4
  19. data/aux/template_query.erb +1 -1
  20. data/bin/genevalidator +8 -6
  21. data/genevalidator.gemspec +1 -1
  22. data/lib/genevalidator.rb +7 -5
  23. data/lib/genevalidator/arg_validation.rb +12 -9
  24. data/lib/genevalidator/blast.rb +18 -11
  25. data/lib/genevalidator/clusterization.rb +35 -31
  26. data/lib/genevalidator/exceptions.rb +0 -1
  27. data/lib/genevalidator/get_raw_sequences.rb +115 -69
  28. data/lib/genevalidator/hsp.rb +8 -8
  29. data/lib/genevalidator/json_to_gv_results.rb +4 -4
  30. data/lib/genevalidator/output.rb +40 -41
  31. data/lib/genevalidator/pool.rb +5 -4
  32. data/lib/genevalidator/query.rb +37 -0
  33. data/lib/genevalidator/tabular_parser.rb +3 -4
  34. data/lib/genevalidator/validation.rb +16 -11
  35. data/lib/genevalidator/validation_alignment.rb +17 -23
  36. data/lib/genevalidator/validation_blast_reading_frame.rb +3 -3
  37. data/lib/genevalidator/validation_duplication.rb +8 -18
  38. data/lib/genevalidator/validation_gene_merge.rb +11 -9
  39. data/lib/genevalidator/validation_length_cluster.rb +8 -11
  40. data/lib/genevalidator/validation_length_rank.rb +5 -4
  41. data/lib/genevalidator/validation_open_reading_frame.rb +5 -5
  42. data/lib/genevalidator/version.rb +1 -1
  43. data/test/test_all_validations.rb +2 -1
  44. data/test/test_blast.rb +4 -3
  45. data/test/test_extended_array_methods.rb +2 -1
  46. data/test/{test_sequences.rb → test_query.rb} +5 -23
  47. data/test/test_validation_open_reading_frame.rb +7 -7
  48. data/test/test_validations.rb +8 -6
  49. metadata +16 -16
  50. data/aux/app_template_footer.erb +0 -1
  51. data/aux/app_template_header.erb +0 -12
  52. data/aux/files/js/plots.js +0 -828
  53. data/aux/files/js/script.js +0 -71
  54. data/lib/genevalidator/sequences.rb +0 -101
@@ -0,0 +1,43 @@
1
+ $(document).ready(function() {
2
+ if (window.chrome && (window.location.protocol === 'file:')){
3
+ $('#mainbrowseralertText').text("Some features are not supported in this browser and have been disabled.");
4
+ $('#browseralert').modal();
5
+ }
6
+ GV.toggleOverviewBtn(); // write overview to overview section
7
+ initTableSorter();
8
+ $("[data-toggle='tooltip']").tooltip(); //ToolTip
9
+ });
10
+
11
+ $(document).on( "click", "td, .plot_btn", function( event ) {
12
+ if ($(this).hasClass('success') || $(this).hasClass('danger')){
13
+ var title = $(this).attr('title');
14
+ var val = title.replace(/[ \/]/g, '');
15
+ GV.addData(this, val);
16
+ } else if ($(this).hasClass('plot_btn')){
17
+ GV.addData(this, 'all');
18
+ }
19
+ });
20
+
21
+ function initTableSorter() {
22
+ $.tablesorter.addParser({
23
+ id: 'star_scores', // called later when init the tablesorter
24
+ is: function() {
25
+ return false; // return false so this parser is not auto detected
26
+ },
27
+ format: function(s, table, cell, cellIndex) {
28
+ var $cell = $(cell);
29
+ if (cellIndex === 1) {
30
+ return $cell.attr('data-score') || s;
31
+ }
32
+ return s;
33
+ },
34
+ parsed: false,
35
+ type: 'numeric' // Setting type of data...
36
+ });
37
+ $('table').tablesorter({
38
+ headers: {
39
+ 1 : { sorter: 'star_scores' } // Telling it to use custom parser...
40
+ },
41
+ sortList: [[0,0]],
42
+ });
43
+ }
data/aux/json_header.erb CHANGED
@@ -1,15 +1,15 @@
1
- <!DOCTYPE html><html><head><title>GeneValidator: identify problems with gene predictions</title><meta content="text/html; charset=utf-8" http-equiv="Content-Type"><script type="text/javascript" src="files/js/jquery-2.1.1.min.js"></script><script type="text/javascript" src="files/js/bootstrap.min.js"></script><script type="text/javascript" src="files/js/jquery.tablesorter.min.js"></script><script type="text/javascript" src="files/js/d3.v3.min.js"></script><script type="text/javascript" src="files/js/script.js"></script><script type="text/javascript" src="files/js/plots.js"></script><link href="files/css/bootstrap.min.css" rel="stylesheet" type="text/css"><link href="files/css/font-awesome.min.css" rel="stylesheet" type="text/css"><link href="files/css/style.css" rel="stylesheet" type="text/css"></head>
1
+ <!DOCTYPE html><html><head><title>GeneValidator: identify problems with gene predictions</title><meta content="text/html; charset=utf-8" http-equiv="Content-Type"><script type="text/javascript" src="files/js/genevalidator.compiled.min.js"></script><link href="files/css/genevalidator.compiled.min.css" rel="stylesheet" type="text/css"></head>
2
2
  <body>
3
3
  <div aria-hidden="true" aria-labelledby="myModalLabel3" class="modal" id="spinner1" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content"><div class="modal-body text-center"><h2>Loading ...</h2><i class="fa fa-spinner fa-5x fa-spin"></i></div></div></div></div>
4
4
  <div class="modal fade" id="alert" aria-hidden="true" aria-labelledby="myModalLabel5" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">&times;</span><span class="sr-only">Close</span></button><h4 class="modal-title">Oops, Something went wrong!</h4></div><div class="modal-body"><p>This operation is not posible. There seems to be too many queries...</p></div><div class="modal-footer"><button type="button" class="btn btn-default" data-dismiss="modal">Close</button></div></div></div></div>
5
- <div class="modal fade" id="browseralert" aria-hidden="true" aria-labelledby="browser" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content browser-alert"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">&times;</span><span class="sr-only">Close</span></button><h4 class="modal-title">Incompatible Browser - Please use Mozilla Firefox!</h4></div><div class="modal-body" id="browseralertbody"><p id="browseralertText">The plugin that we use to display the validation plots means that your browser is currently unsupported.</p><p>Please use <strong>Mozilla FireFox</strong> to view this file.</p></div></div></div></div>
6
- <div class="container"><span class="menu_icon pull-right"><a href="#" id="show_all_plots" onclick="show_all_plots(this);"><i class="fa fa-2x fa-bar-chart-o"></i><br>Show All Charts</a></span><span class="menu_icon pull-right"><a href="https://github.com/wurmlab/GeneValidator" target="_blank"><i class="fa fa-2x fa-github"></i><br>Source Code</a></span><span class="menu_icon pull-right"><a href="http://wurmlab.github.io/tools/genevalidator/"><i class="fa fa-2x fa-info-circle"></i><br>About</a></span><div class="clearfix"></div><img class="logo" src="files/img/gene.png" alt="logo"><div class="page-title"><h1>Gene Validator <small>v<%= GeneValidator::VERSION %></small></h1><h4 class="subheading">Identify Problems with Gene Predictions</h4></div><br/><br/>
7
- <div id="report"><div id="report_1"></div></div><br><br>
5
+ <div class="modal fade" id="browseralert" aria-hidden="true" aria-labelledby="browser" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content browser-alert"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">&times;</span><span class="sr-only">Close</span></button><h4 class="modal-title">Incompatible Browser - Please use Mozilla Firefox!</h4></div><div class="modal-body" id="browseralertbody"><p id="browseralertText"><strong id="mainbrowseralertText">Sorry, this feature is not supported in your browser.</strong></p><p>This is because Chrome does not allow access to local files. Thus, to avoid this, simply use a different browser (like Firefox or Safari) or start a local server in the results folder.</p><p>Please use <strong>Mozilla FireFox</strong> to view this file.</p></div></div></div></div>
6
+ <div class="container"><span class="menu_icon pull-right"><a href="#" onclick="GV.toggleAllPlots(this);"><i class="fa fa-2x fa-bar-chart-o"></i><br><span id="show_all_plots">Show All Charts</span></a></span><span class="menu_icon pull-right"><a href="https://github.com/wurmlab/genevalidator" target="_blank"><i class="fa fa-2x fa-github"></i><br>Source Code</a></span><span class="menu_icon pull-right"><a href="http://wurmlab.github.io/tools/genevalidator/"><i class="fa fa-2x fa-info-circle"></i><br>About</a></span><div class="clearfix"></div><img class="logo" src="files/img/gene.png" alt="logo"><div class="page-title"><h1>Gene Validator <small>v<%= GeneValidator::VERSION %></small></h1><h4 class="subheading">Identify Problems with Gene Predictions</h4></div><br/><br/>
7
+ <div id="overview" class="text-left"><div id="overview_text"></div><button id="overview_btn" class="btn btn-primary btn-sm" data-toggle="button" onclick="GV.toggleOverviewBtn();"></button></div><br><br>
8
8
  <table id="sortable_table" class="table table-striped table-collapsed table-bordered table-condensed tablesorter"><thead>
9
9
  <tr id="header"><th>#</th><th>Ranking</th><th>Sequence Definition&nbsp;<span data-toggle="tooltip" title="Query definition as it apears in the input fasta file." data-placement="top"><i class="fa fa-question-circle"></i></span></th><th>No. Hits&nbsp;<span data-toggle="tooltip" title="Number of non-identical hits found by BLAST." data-placement="top"><i class="fa fa-question-circle"></i></span></th>
10
10
  <% @json_array[0]['validations'].each do |_short_header, item| %>
11
- <th class="sorter-false"><b><%= item['header'] %></b>
12
- <% if item['header'] == "LengthCluster" || item['header'] == "GeneMerge" || item['header'] == "MainORF" || item['header'] == "MissingExtraSequences" %>
11
+ <th class="sorter-false"><b><%= item['header'] %>&nbsp;</b>
12
+ <% if item['header'] == "Length Cluster" || item['header'] == "Gene Merge" || item['header'] == "Main ORF" || item['header'] == "Missing/Extra Sequences" %>
13
13
  <span data-toggle="tooltip" title="Charts available for this validation" data-placement="top"><i class="fa fa-bar-chart-o chartIcon"></i></span>&nbsp;<span data-toggle="tooltip" title="<%=item['description']%>" data-placement="top"><i class="fa fa-question-circle"></i></span>
14
14
  <% else %>
15
15
  <span data-toggle="tooltip" title="<%=item['description']%>" data-placement="top"><i class="fa fa-question-circle"></i></span>
data/aux/json_query.erb CHANGED
@@ -4,7 +4,8 @@
4
4
  <td title="Definition"><%= @row['definition'] %></td>
5
5
  <td title="No. Hits"><%= @row['no_hits'] %></td>
6
6
  <% @row['validations'].each do |_short_header, item| %>
7
- <td title="<%=item['header']%>" class="<%=item['status']%>"><%= item['print']%></td>
7
+ <td title="<%=item['header']%>" class="<%=item['status']%>"><%= item['print'].gsub(' ', '&nbsp;').gsub(';&nbsp;', '; ')
8
+ %></td>
8
9
  <% end %>
9
10
  <% if @row['validations'].select{|_short_header, item| item['graphs'] != nil}.map{|_short_header, item| item['graphs'].length}.inject(0){|r, e| r+e } != 0 %>
10
11
  <td><button title="Show plots" class="plot_btn btn btn-default"><i class="fa fa-bar-chart-o"></i></button></td>
@@ -1,15 +1,4 @@
1
1
  </tbody></table>
2
- <script>
3
- var evaluation_div = document.getElementById('report_1');
4
- var content_less = "<br><%= less %><div class=\"clearfix\"></div><br><button type=\"button\" onClick='add_content()' class=\"btn btn-primary btn-sm\"> Show More</button>";
5
- var content = "<%= evaluation %> <div class=\"clearfix\"></div><br><button type=\"button\" onClick='evaluation_div.innerHTML=content_less' class=\"btn btn-primary btn-sm\"> Show Less</button>";
6
- evaluation_div.innerHTML = content_less;
7
- function add_content(){
8
- evaluation_div = document.getElementById('report_1');
9
- evaluation_div.innerHTML=content;
10
- addOverallPlot('files/json/overview.json');
11
- }
12
- </script>
13
2
  </div>
14
3
  <% if output_files.length > 1 %>
15
4
  <nav><ul class="pagination">
@@ -1,10 +1,10 @@
1
- <!DOCTYPE html><html><head><title>GeneValidator: identify problems with gene predictions</title><meta content="text/html; charset=utf-8" http-equiv="Content-Type"><script type="text/javascript" src="files/js/jquery-2.1.1.min.js"></script><script type="text/javascript" src="files/js/bootstrap.min.js"></script><script type="text/javascript" src="files/js/jquery.tablesorter.min.js"></script><script type="text/javascript" src="files/js/d3.v3.min.js"></script><script type="text/javascript" src="files/js/script.js"></script><script type="text/javascript" src="files/js/plots.js"></script><link href="files/css/bootstrap.min.css" rel="stylesheet" type="text/css"><link href="files/css/font-awesome.min.css" rel="stylesheet" type="text/css"><link href="files/css/style.css" rel="stylesheet" type="text/css"></head>
1
+ <!DOCTYPE html><html><head><title>GeneValidator: identify problems with gene predictions</title><meta content="text/html; charset=utf-8" http-equiv="Content-Type"><script type="text/javascript" src="files/js/genevalidator.compiled.min.js"></script><link href="files/css/genevalidator.compiled.min.css" rel="stylesheet" type="text/css"></head>
2
2
  <body>
3
3
  <div aria-hidden="true" aria-labelledby="myModalLabel3" class="modal" id="spinner1" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content"><div class="modal-body text-center"><h2>Loading ...</h2><i class="fa fa-spinner fa-5x fa-spin"></i></div></div></div></div>
4
4
  <div class="modal fade" id="alert" aria-hidden="true" aria-labelledby="myModalLabel5" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">&times;</span><span class="sr-only">Close</span></button><h4 class="modal-title">Oops, Something went wrong!</h4></div><div class="modal-body"><p>This operation is not posible. There seems to be too many queries...</p></div><div class="modal-footer"><button type="button" class="btn btn-default" data-dismiss="modal">Close</button></div></div></div></div>
5
- <div class="modal fade" id="browseralert" aria-hidden="true" aria-labelledby="browser" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content browser-alert"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">&times;</span><span class="sr-only">Close</span></button><h4 class="modal-title">Incompatible Browser - Please use Mozilla Firefox!</h4></div><div class="modal-body" id="browseralertbody"><p id="browseralertText">The plugin that we use to display the validation plots means that your browser is currently unsupported.</p><p>Please use <strong>Mozilla FireFox</strong> to view this file.</p></div></div></div></div>
6
- <div class="container"><span class="menu_icon pull-right"><a href="#" id="show_all_plots" onclick="show_all_plots(this);"><i class="fa fa-2x fa-bar-chart-o"></i><br>Show All Charts</a></span><span class="menu_icon pull-right"><a href="https://github.com/wurmlab/GeneValidator" target="_blank"><i class="fa fa-2x fa-github"></i><br>Source Code</a></span><span class="menu_icon pull-right"><a href="http://wurmlab.github.io/tools/genevalidator/"><i class="fa fa-2x fa-info-circle"></i><br>About</a></span><div class="clearfix"></div><img class="logo" src="files/img/gene.png" alt="logo"><div class="page-title"><h1>Gene Validator <small>v<%= GeneValidator::VERSION %></small></h1><h4 class="subheading">Identify Problems with Gene Predictions</h4></div><br/><br/>
7
- <div id="report"><div id="report_1"></div></div><br><br>
5
+ <div class="modal fade" id="browseralert" aria-hidden="true" aria-labelledby="browser" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content browser-alert"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">&times;</span><span class="sr-only">Close</span></button><h4 class="modal-title">Incompatible Browser - Please use Mozilla Firefox!</h4></div><div class="modal-body" id="browseralertbody"><p id="browseralertText"><strong id="mainbrowseralertText">Sorry, this feature is not supported in your browser.</strong></p><p>This is because Chrome does not allow access to local files. Thus, to avoid this, simply use a different browser (like Firefox or Safari) or start a local server in the results folder.</p><p>Please use <strong>Mozilla FireFox</strong> to view this file.</p></div></div></div></div>
6
+ <div class="container"><span class="menu_icon pull-right"><a href="#" onclick="GV.toggleAllPlots(this);"><i class="fa fa-2x fa-bar-chart-o"></i><br><span id="show_all_plots">Show All Charts</span></a></span><span class="menu_icon pull-right"><a href="https://github.com/wurmlab/genevalidator" target="_blank"><i class="fa fa-2x fa-github"></i><br>Source Code</a></span><span class="menu_icon pull-right"><a href="http://wurmlab.github.io/tools/genevalidator/"><i class="fa fa-2x fa-info-circle"></i><br>About</a></span><div class="clearfix"></div><img class="logo" src="files/img/gene.png" alt="logo"><div class="page-title"><h1>Gene Validator <small>v<%= GeneValidator::VERSION %></small></h1><h4 class="subheading">Identify Problems with Gene Predictions</h4></div><br/><br/>
7
+ <div id="overview" class="text-left"><div id="overview_text"></div><button id="overview_btn" class="btn btn-primary btn-sm" data-toggle="button" onclick="GV.toggleOverviewBtn();"></button></div><br><br>
8
8
  <table id="sortable_table" class="table table-striped table-collapsed table-bordered table-condensed tablesorter"><thead>
9
9
  <tr id="header"><th>#</th><th>Ranking</th><th>Sequence Definition&nbsp;<span data-toggle="tooltip" title="Query definition as it apears in the input fasta file." data-placement="top"><i class="fa fa-question-circle"></i></span></th><th>No. Hits&nbsp;<span data-toggle="tooltip" title="Number of non-identical hits found by BLAST." data-placement="top"><i class="fa fa-question-circle"></i></span></th>
10
10
  <% @validations.each do |item| %>
@@ -1,6 +1,6 @@
1
1
  <tr data-target="toggle<%= @idx %>" data-jsonFile="files/json/<%= @config[:filename] %>_<%= @idx %>.json">
2
2
  <td title="idx"><%= @idx %></td>
3
- <td data-score="<%= overall_score %>"><div class="ratings"><div class="empty-stars"></div><div class="full-stars" style="width:<%= overall_score * 0.85 %>%;"></div></div></td>
3
+ <td data-score="<%= overall_score %>"><div class="ratings"><div class="empty-stars"></div><div class="full-stars" style="width:<%= overall_score %>%;"></div></div></td>
4
4
  <td title="Definition"><%= @prediction_def %></td>
5
5
  <td title="No. Hits"><%= @nr_hits %></td>
6
6
  <% @validations.each do |item| %>
data/bin/genevalidator CHANGED
@@ -11,7 +11,7 @@ opt_parser = OptionParser.new do |opts|
11
11
  opts.banner = <<BANNER
12
12
 
13
13
  USAGE:
14
- $ genevalidator [OPTIONS] Input_File
14
+ genevalidator [OPTIONS] Input_File
15
15
 
16
16
  ARGUMENTS:
17
17
  Input_File: Path to the input fasta file containing the predicted sequences.
@@ -38,7 +38,7 @@ BANNER
38
38
  opts.on('-d', '--db [BLAST_DATABASE]',
39
39
  'Path to the BLAST database',
40
40
  'GeneValidator also supports remote databases:',
41
- 'e.g. $ genevalidator -d "swissprot -remote" Input_File') do |db|
41
+ 'e.g. genevalidator -d "swissprot -remote" Input_File') do |db|
42
42
  opt[:db] = db
43
43
  end
44
44
 
@@ -50,8 +50,8 @@ BANNER
50
50
  end
51
51
 
52
52
  opts.on('-j', '--json_file [JSON_FILE]',
53
- 'Generate GV results from a json file (or a subset of a json file)',
54
- 'produced from GeneValidator') do |json|
53
+ 'Generate HTML report from a JSON file (or a subset of a JSON file)',
54
+ 'produced by GeneValidator') do |json|
55
55
  opt[:json_file] = json
56
56
  end
57
57
 
@@ -80,7 +80,7 @@ BANNER
80
80
  'BLAST and Mafft within GeneValidator.') do |num_threads|
81
81
  opt[:num_threads] = num_threads
82
82
  end
83
-
83
+
84
84
  opts.on('-r', '--raw_sequences [raw_seq]',
85
85
  'Supply a fasta file of the raw sequences of all BLAST hits present',
86
86
  'in the supplied BLAST XML or BLAST tabular file.') do |raw_seq|
@@ -90,7 +90,7 @@ BANNER
90
90
  opts.on('-b', '--binaries [binaries]', Array,
91
91
  'Path to BLAST and MAFFT bin folders (is added to $PATH variable)',
92
92
  'To be provided as follows:',
93
- ' $ genevalidator -b /blast/bin/path/ -b /mafft/bin/path/') do |bin|
93
+ 'e.g. genevalidator -b /blast/bin/path/ -b /mafft/bin/path/') do |bin|
94
94
  (opt[:bin] ||= []).concat(bin)
95
95
  end
96
96
 
@@ -127,9 +127,11 @@ start = Time.now
127
127
  if opt[:extract_raw_seqs] && opt[:raw_sequences].nil?
128
128
  GeneValidator.opt = opt
129
129
  GeneValidator.config = {}
130
+ # GVArgValidation.run_raw_seqs
130
131
  GeneValidator::RawSequences.run
131
132
  elsif opt[:json_file]
132
133
  GeneValidator.opt = opt
134
+ # GVArgValidation.run_json_to_gv_results
133
135
  GeneValidator::JsonToGVResults.run
134
136
  else
135
137
  GeneValidator.init(opt)
@@ -1,4 +1,4 @@
1
- # coding: utf-8
1
+ # coding: utf-8
2
2
  lib = File.expand_path('../lib', __FILE__)
3
3
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'genevalidator/version'
data/lib/genevalidator.rb CHANGED
@@ -1,5 +1,4 @@
1
1
  require 'fileutils'
2
-
3
2
  require 'bio-blastxmlparser'
4
3
 
5
4
  require 'genevalidator/arg_validation'
@@ -68,16 +67,19 @@ module GeneValidator
68
67
  # Parse the blast output and run validations
69
68
  def run
70
69
  # Run BLAST on all sequences (generates @opt[:blast_xml_file])
71
- # if no BLAST OUTPUT file provided...
70
+ # if no BLAST OUTPUT file provided...
72
71
  unless @opt[:blast_xml_file] || @opt[:blast_tabular_file]
73
72
  BlastUtils.run_blast_on_input_file
74
73
  end
75
- # Obtain fasta file of all BLAST hits
76
- RawSequences.run unless @opt[:raw_sequences]
74
+ # Obtain fasta file of all BLAST hits if running align or dup validations
75
+ if @opt[:validations].include?('align') ||
76
+ @opt[:validations].include?('dup')
77
+ RawSequences.run unless @opt[:raw_sequences]
78
+ end
77
79
  # Run Validations
78
80
  iterator = parse_blast_output_file
79
81
  (Validations.new).run_validations(iterator)
80
-
82
+
81
83
  Output.write_json_file(@config[:json_output], @config[:json_file])
82
84
  Output.print_footer(@overview, @config)
83
85
  end
@@ -26,7 +26,6 @@ module GeneValidator
26
26
  check_num_threads
27
27
 
28
28
  export_bin_dirs unless @opt[:bin].nil?
29
-
30
29
  Blast.validate(opt) unless @opt[:test]
31
30
  assert_mafft_installation
32
31
  end
@@ -53,7 +52,8 @@ module GeneValidator
53
52
  $stderr.puts 'Number of threads can not be lower than 0'
54
53
  end
55
54
  return unless @opt[:num_threads] > 256
56
- $stderr.puts "Number of threads set at #{@opt[:num_threads]} is unusually high."
55
+ $stderr.puts "Number of threads set at #{@opt[:num_threads]} is" \
56
+ ' unusually high.'
57
57
  end
58
58
 
59
59
  def assert_BLAST_output_files
@@ -69,8 +69,8 @@ module GeneValidator
69
69
  def assert_output_dir_does_not_exist
70
70
  output_dir = "#{@opt[:input_fasta_file]}.html"
71
71
  return unless File.exist?(output_dir)
72
- $stderr.puts "The output directory already exists for this fasta file.\n"
73
- $stderr.puts "Please remove the following directory: #{output_dir}\n"
72
+ $stderr.puts 'The output directory already exists for this fasta file.'
73
+ $stderr.puts "\nPlease remove the following directory: #{output_dir}\n"
74
74
  $stderr.puts "You can run the following command to remove the folder.\n"
75
75
  $stderr.puts "\n $ rm -r #{output_dir} \n"
76
76
  exit 1
@@ -102,14 +102,16 @@ module GeneValidator
102
102
  fasta_content = IO.binread(@opt[:input_fasta_file])
103
103
  type = BlastUtils.type_of_sequences(fasta_content)
104
104
  return if type == :nucleotide || type == :protein
105
- $stderr.puts '*** Error: The input files does not contain just protein or'
106
- $stderr.puts ' nucleotide data. Please correct this and try again.'
105
+ $stderr.puts '*** Error: The input files does not contain just protein'
106
+ $stderr.puts ' or nucleotide data.'
107
+ $stderr.puts ' Please correct this and try again.'
107
108
  exit 1
108
109
  end
109
110
 
110
111
  def export_bin_dirs
111
112
  @opt[:bin].each do |bin|
112
- if File.directory?(bin)
113
+ bin = File.expand_path(bin)
114
+ if File.exist?(bin) && File.directory?(bin)
113
115
  add_to_path(bin)
114
116
  else
115
117
  $stderr.puts '*** The following bin directory does not exist:'
@@ -120,6 +122,7 @@ module GeneValidator
120
122
 
121
123
  ## Checks if dir is in $PATH and if not, it adds the dir to the $PATH.
122
124
  def add_to_path(bin_dir)
125
+ return unless bin_dir
123
126
  return if ENV['PATH'].split(':').include?(bin_dir)
124
127
  ENV['PATH'] = "#{bin_dir}:#{ENV['PATH']}"
125
128
  end
@@ -157,12 +160,12 @@ module GeneValidator
157
160
 
158
161
  def warn_if_remote_database(db)
159
162
  return if db !~ /remote/
160
- $stderr.puts # a blank line
163
+ $stderr.puts # a blank line
161
164
  $stderr.puts 'Warning: BLAST will be carried out on remote servers.'
162
165
  $stderr.puts 'This may take quite a bit of time.'
163
166
  $stderr.puts 'You may want to install a local BLAST database for' \
164
167
  ' faster analyses.'
165
- $stderr.puts # a blank line
168
+ $stderr.puts # a blank line
166
169
  end
167
170
 
168
171
  def assert_local_blast_database_exists(db)
@@ -4,8 +4,8 @@ require 'forwardable'
4
4
 
5
5
  require 'genevalidator/exceptions'
6
6
  require 'genevalidator/hsp'
7
- require 'genevalidator/sequences'
8
7
  require 'genevalidator/output'
8
+ require 'genevalidator/query'
9
9
 
10
10
  module GeneValidator
11
11
  # Contains methods that run BLAST and methods that analyse sequences
@@ -36,7 +36,7 @@ module GeneValidator
36
36
  " #{threads}"
37
37
 
38
38
  cmd = "echo \"#{query}\" | #{blastcmd}"
39
- `#{cmd} 2>/dev/null`
39
+ `#{cmd} >/dev/null 2>&1`
40
40
  end
41
41
 
42
42
  ##
@@ -54,7 +54,7 @@ module GeneValidator
54
54
  num_threads = opt[:num_threads])
55
55
  return if opt[:blast_xml_file] || opt[:blast_tabular_file]
56
56
 
57
- $stderr.puts 'Running BLAST'
57
+ $stderr.puts 'Running BLAST. This may take a while.'
58
58
  opt[:blast_xml_file] = input_file + '.blast_xml'
59
59
 
60
60
  blast_type = (seq_type == :protein) ? 'blastp' : 'blastx'
@@ -65,11 +65,16 @@ module GeneValidator
65
65
  " -out '#{opt[:blast_xml_file]}' -db #{db} " \
66
66
  " -evalue #{EVALUE} -outfmt 5 #{threads}"
67
67
 
68
- `#{blastcmd}`
68
+ `#{blastcmd} >/dev/null 2>&1`
69
69
  return unless File.zero?(opt[:blast_xml_file])
70
- $stderr.puts 'Blast failed to run on the input file. Please ensure that the'
71
- $stderr.puts 'BLAST database exists and try again'
72
- exit 1
70
+ $stderr.puts 'Blast failed to run on the input file.'
71
+ if opt[:db] !~ /remote/
72
+ $stderr.puts 'Please ensure that the BLAST database exists and try'
73
+ $stderr.puts 'again.'
74
+ else
75
+ $stderr.puts 'You are using BLAST with a remote database. Please'
76
+ $stderr.puts 'ensure that you have internet access and try again.'
77
+ end
73
78
  end
74
79
 
75
80
  ##
@@ -86,7 +91,7 @@ module GeneValidator
86
91
  # parse blast the xml output and get the hits
87
92
  # hits obtained are proteins! (we use only blastp and blastx)
88
93
  iter.each do |hit|
89
- seq = Sequence.new
94
+ seq = Query.new
90
95
 
91
96
  seq.length_protein = hit.len.to_i
92
97
  seq.type = :protein
@@ -126,7 +131,8 @@ module GeneValidator
126
131
  end
127
132
  current_hsp.align_len = hsp.align_len.to_i
128
133
  current_hsp.identity = hsp.identity.to_i
129
- current_hsp.pidentity = (100 * hsp.identity / (hsp.align_len + 0.0)).round(2)
134
+ current_hsp.pidentity = (100 * hsp.identity / hsp.align_len.to_f)
135
+ .round(2)
130
136
 
131
137
  hsps.push(current_hsp)
132
138
  end
@@ -158,10 +164,11 @@ module GeneValidator
158
164
  # the first sequence does not need to have a fasta definition line
159
165
  sequences = fasta_format_string.split(/^>.*$/).delete_if(&:empty?)
160
166
  # get all sequence types
161
- sequence_types = sequences.collect { |seq| guess_sequence_type(seq) }.uniq.compact
167
+ sequence_types = sequences.collect { |seq| guess_sequence_type(seq) }
168
+ .uniq.compact
162
169
 
163
170
  return nil if sequence_types.empty?
164
- return sequence_types.first if sequence_types.length == 1
171
+ sequence_types.first if sequence_types.length == 1
165
172
  end
166
173
 
167
174
  ##
@@ -96,7 +96,8 @@ module GeneValidator
96
96
  d
97
97
  end
98
98
 
99
- # Returns the euclidian distance between the current cluster and the one given as parameter
99
+ # Returns the euclidian distance between the current cluster and the one
100
+ # given as parameter
100
101
  # Params:
101
102
  # +cluster+: Cluster object
102
103
  # +method+: 0 or 1
@@ -126,7 +127,7 @@ module GeneValidator
126
127
  # Returns within cluster sum of squares
127
128
  def wss(objects = nil)
128
129
  if objects.nil?
129
- objects = @objects.map { |x| a = Array.new(x[1], x[0]) }.flatten
130
+ objects = @objects.map { |x| Array.new(x[1], x[0]) }.flatten
130
131
  end
131
132
 
132
133
  cluster_mean = mean
@@ -181,7 +182,8 @@ module GeneValidator
181
182
  d
182
183
  end
183
184
 
184
- # Returns the euclidian distance between the current cluster and the one given as parameter
185
+ # Returns the euclidian distance between the current cluster and the one
186
+ # given as parameter
185
187
  # Params:
186
188
  # +cluster+: Cluster object
187
189
  # +method+: 0 or 1
@@ -212,7 +214,7 @@ module GeneValidator
212
214
  # Returns within cluster sum of squares
213
215
  def wss(lengths = nil)
214
216
  if lengths.nil?
215
- lengths = @lengths.map { |x| a = Array.new(x[1], x[0]) }.flatten
217
+ lengths = @lengths.map { |x| Array.new(x[1], x[0]) }.flatten
216
218
  end
217
219
 
218
220
  cluster_mean = mean
@@ -226,12 +228,13 @@ module GeneValidator
226
228
  ##
227
229
  # Returns the standard deviation of a set of values
228
230
  # Params:
229
- # +lengths+: a vector of values (optional, by default it takes the values in the cluster)
231
+ # +lengths+: a vector of values (optional, by default it takes the values
232
+ # in the cluster)
230
233
  # Output:
231
234
  # Real number
232
235
  def standard_deviation(lengths = nil)
233
236
  if lengths.nil?
234
- lengths = @lengths.map { |x| a = Array.new(x[1], x[0]) }.flatten
237
+ lengths = @lengths.map { |x| Array.new(x[1], x[0]) }.flatten
235
238
  end
236
239
 
237
240
  cluster_mean = mean
@@ -250,8 +253,8 @@ module GeneValidator
250
253
  # Output:
251
254
  # Real number
252
255
  def deviation(clusters, queryLength)
253
- hits = clusters.map { |c| c.lengths.map { |x| a = Array.new(x[1], x[0]) }.flatten }.flatten
254
- raw_hits = clusters.map { |c| c.lengths.map { |x| a = Array.new(x[1], x[0]) }.flatten }.flatten.to_s.gsub('[', '').gsub(']', '')
256
+ hits = clusters.map { |c| c.lengths.map { |x| Array.new(x[1], x[0]) }.flatten }.flatten
257
+ raw_hits = clusters.map { |c| c.lengths.map { |x| Array.new(x[1], x[0]) }.flatten }.flatten.to_s.gsub('[', '').gsub(']', '')
255
258
  R.eval("sd = sd(c(#{raw_hits}))")
256
259
  sd = R.pull('sd')
257
260
  sd = standard_deviation(hits)
@@ -313,7 +316,8 @@ module GeneValidator
313
316
  @clusters = []
314
317
  end
315
318
 
316
- def hierarchical_clusterization_2d(no_clusters = 0, distance_method = 0, vec = @values, debug = false)
319
+ def hierarchical_clusterization_2d(no_clusters = 0, distance_method = 0,
320
+ vec = @values, debug = false)
317
321
  clusters = []
318
322
 
319
323
  if vec.length == 1
@@ -332,11 +336,9 @@ module GeneValidator
332
336
 
333
337
  # clusters = array of clusters
334
338
  # initially each length belongs to a different cluster
335
- histogram.each do |elem|
336
- if debug
337
- $stderr.puts "pair (#{elem[0].x} #{elem[0].y}) appears #{elem[1]} times"
338
- end
339
- hash = { elem[0] => elem[1] }
339
+ histogram.each do |e|
340
+ $stderr.puts "pair (#{e[0].x} #{e[0].y}) appears #{e[1]} times" if debug
341
+ hash = { e[0] => e[1] }
340
342
  cluster = PairCluster.new(hash)
341
343
  clusters.push(cluster)
342
344
  end
@@ -352,7 +354,7 @@ module GeneValidator
352
354
  # stop condition 1
353
355
  break if no_clusters != 0 && clusters.length == no_clusters
354
356
 
355
- iteration = iteration + 1
357
+ iteration += iteration
356
358
  $stderr.puts "\nIteration #{iteration}" if debug
357
359
 
358
360
  min_distance = 100_000_000
@@ -363,17 +365,19 @@ module GeneValidator
363
365
  [*(0..(clusters.length - 2))].each do |i|
364
366
  [*((i + 1)..(clusters.length - 1))].each do |j|
365
367
  dist = clusters[i].distance(clusters[j], distance_method)
366
- $stderr.puts "distance between clusters #{i} and #{j} is #{dist}" if debug
368
+ if debug
369
+ $stderr.puts "distance between clusters #{i} and #{j} is #{dist}"
370
+ end
367
371
  current_density = clusters[i].density + clusters[j].density
368
372
  if dist < min_distance
369
373
  min_distance = dist
370
- cluster1 = i
371
- cluster2 = j
372
- density = current_density
374
+ cluster1 = i
375
+ cluster2 = j
376
+ density = current_density
373
377
  elsif dist == min_distance && density < current_density
374
378
  cluster1 = i
375
379
  cluster2 = j
376
- density = current_density
380
+ density = current_density
377
381
  end
378
382
  end
379
383
  end
@@ -402,8 +406,8 @@ module GeneValidator
402
406
  end
403
407
 
404
408
  ##
405
- # Makes an hierarchical clusterization until the most dense cluster is obtained
406
- # or the distance between clusters is sufficintly big
409
+ # Makes an hierarchical clusterization until the most dense cluster is
410
+ # obtained or the distance between clusters is sufficintly big
407
411
  # or the desired number of clusters is obtained
408
412
  # Params:
409
413
  # +no_clusters+: stop test (number of clusters)
@@ -412,12 +416,13 @@ module GeneValidator
412
416
  # +debug+: display debug information
413
417
  # Output:
414
418
  # vector of +Cluster+ objects
415
- def hierarchical_clusterization(no_clusters = 0, distance_method = 0, vec = @values, debug = false)
419
+ def hierarchical_clusterization(no_clusters = 0, distance_method = 0,
420
+ vec = @values, debug = false)
416
421
  clusters = []
417
422
  vec = vec.sort
418
423
 
419
424
  if vec.length == 1
420
- hash = { vec[0] => 1 }
425
+ hash = { vec[0] => 1 }
421
426
  cluster = Cluster.new(hash)
422
427
  clusters.push(cluster)
423
428
  clusters
@@ -425,7 +430,7 @@ module GeneValidator
425
430
 
426
431
  # Thresholds
427
432
  threshold_distance = (0.25 * (vec.max - vec.min))
428
- threshold_density = (0.5 * vec.length).to_i
433
+ threshold_density = (0.5 * vec.length).to_i
429
434
 
430
435
  # make a histogram from the input vector
431
436
  histogram = Hash[vec.group_by { |x| x }.map { |k, vs| [k, vs.length] }]
@@ -447,11 +452,10 @@ module GeneValidator
447
452
  # the loop stops according to the stop conditions
448
453
  iteration = 0
449
454
  loop do
450
-
451
455
  # stop condition 1
452
456
  break if no_clusters != 0 && clusters.length == no_clusters
453
457
 
454
- iteration = iteration + 1
458
+ iteration += iteration
455
459
  $stderr.puts "\nIteration #{iteration}" if debug
456
460
 
457
461
  min_distance = 100_000_000
@@ -460,7 +464,9 @@ module GeneValidator
460
464
 
461
465
  clusters[0..clusters.length - 2].each_with_index do |_item, i|
462
466
  dist = clusters[i].distance(clusters[i + 1], distance_method)
463
- $stderr.puts "distance between clusters #{i} and #{i + 1} is #{dist}" if debug
467
+ if debug
468
+ $stderr.puts "distance btwn clusters #{i} and #{i + 1} is #{dist}"
469
+ end
464
470
  current_density = clusters[i].density + clusters[i + 1].density
465
471
  if dist < min_distance
466
472
  min_distance = dist
@@ -509,9 +515,7 @@ module GeneValidator
509
515
  max_density = 0
510
516
  max_density_cluster = 0
511
517
 
512
- if clusters.nil?
513
- nil
514
- end
518
+ nil if clusters.nil?
515
519
 
516
520
  clusters.each_with_index do |item, i|
517
521
  if item.density > max_density