genevalidator 1.6.2 → 1.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +147 -76
  3. data/Rakefile +1 -1
  4. data/aux/files/css/genevalidator.compiled.min.css +16 -0
  5. data/aux/files/css/{bootstrap.min.css → src/bootstrap.min.css} +0 -0
  6. data/aux/files/css/{font-awesome.min.css → src/font-awesome.min.css} +0 -0
  7. data/aux/files/css/{style.css → src/style.css} +0 -0
  8. data/aux/files/js/genevalidator.compiled.min.js +28 -0
  9. data/aux/files/js/{bootstrap.min.js → src/bootstrap.min.js} +0 -0
  10. data/aux/files/js/{d3.v3.min.js → src/d3.v3.min.js} +0 -0
  11. data/aux/files/js/{jquery-2.1.1.min.js → src/jquery-2.1.1.min.js} +0 -0
  12. data/aux/files/js/{jquery.tablesorter.min.js → src/jquery.tablesorter.min.js} +0 -0
  13. data/aux/files/js/src/plots.js +814 -0
  14. data/aux/files/js/src/script.js +43 -0
  15. data/aux/json_header.erb +6 -6
  16. data/aux/json_query.erb +2 -1
  17. data/aux/template_footer.erb +0 -11
  18. data/aux/template_header.erb +4 -4
  19. data/aux/template_query.erb +1 -1
  20. data/bin/genevalidator +8 -6
  21. data/genevalidator.gemspec +1 -1
  22. data/lib/genevalidator.rb +7 -5
  23. data/lib/genevalidator/arg_validation.rb +12 -9
  24. data/lib/genevalidator/blast.rb +18 -11
  25. data/lib/genevalidator/clusterization.rb +35 -31
  26. data/lib/genevalidator/exceptions.rb +0 -1
  27. data/lib/genevalidator/get_raw_sequences.rb +115 -69
  28. data/lib/genevalidator/hsp.rb +8 -8
  29. data/lib/genevalidator/json_to_gv_results.rb +4 -4
  30. data/lib/genevalidator/output.rb +40 -41
  31. data/lib/genevalidator/pool.rb +5 -4
  32. data/lib/genevalidator/query.rb +37 -0
  33. data/lib/genevalidator/tabular_parser.rb +3 -4
  34. data/lib/genevalidator/validation.rb +16 -11
  35. data/lib/genevalidator/validation_alignment.rb +17 -23
  36. data/lib/genevalidator/validation_blast_reading_frame.rb +3 -3
  37. data/lib/genevalidator/validation_duplication.rb +8 -18
  38. data/lib/genevalidator/validation_gene_merge.rb +11 -9
  39. data/lib/genevalidator/validation_length_cluster.rb +8 -11
  40. data/lib/genevalidator/validation_length_rank.rb +5 -4
  41. data/lib/genevalidator/validation_open_reading_frame.rb +5 -5
  42. data/lib/genevalidator/version.rb +1 -1
  43. data/test/test_all_validations.rb +2 -1
  44. data/test/test_blast.rb +4 -3
  45. data/test/test_extended_array_methods.rb +2 -1
  46. data/test/{test_sequences.rb → test_query.rb} +5 -23
  47. data/test/test_validation_open_reading_frame.rb +7 -7
  48. data/test/test_validations.rb +8 -6
  49. metadata +16 -16
  50. data/aux/app_template_footer.erb +0 -1
  51. data/aux/app_template_header.erb +0 -12
  52. data/aux/files/js/plots.js +0 -828
  53. data/aux/files/js/script.js +0 -71
  54. data/lib/genevalidator/sequences.rb +0 -101
@@ -0,0 +1,43 @@
1
+ $(document).ready(function() {
2
+ if (window.chrome && (window.location.protocol === 'file:')){
3
+ $('#mainbrowseralertText').text("Some features are not supported in this browser and have been disabled.");
4
+ $('#browseralert').modal();
5
+ }
6
+ GV.toggleOverviewBtn(); // write overview to overview section
7
+ initTableSorter();
8
+ $("[data-toggle='tooltip']").tooltip(); //ToolTip
9
+ });
10
+
11
+ $(document).on( "click", "td, .plot_btn", function( event ) {
12
+ if ($(this).hasClass('success') || $(this).hasClass('danger')){
13
+ var title = $(this).attr('title');
14
+ var val = title.replace(/[ \/]/g, '');
15
+ GV.addData(this, val);
16
+ } else if ($(this).hasClass('plot_btn')){
17
+ GV.addData(this, 'all');
18
+ }
19
+ });
20
+
21
+ function initTableSorter() {
22
+ $.tablesorter.addParser({
23
+ id: 'star_scores', // called later when init the tablesorter
24
+ is: function() {
25
+ return false; // return false so this parser is not auto detected
26
+ },
27
+ format: function(s, table, cell, cellIndex) {
28
+ var $cell = $(cell);
29
+ if (cellIndex === 1) {
30
+ return $cell.attr('data-score') || s;
31
+ }
32
+ return s;
33
+ },
34
+ parsed: false,
35
+ type: 'numeric' // Setting type of data...
36
+ });
37
+ $('table').tablesorter({
38
+ headers: {
39
+ 1 : { sorter: 'star_scores' } // Telling it to use custom parser...
40
+ },
41
+ sortList: [[0,0]],
42
+ });
43
+ }
data/aux/json_header.erb CHANGED
@@ -1,15 +1,15 @@
1
- <!DOCTYPE html><html><head><title>GeneValidator: identify problems with gene predictions</title><meta content="text/html; charset=utf-8" http-equiv="Content-Type"><script type="text/javascript" src="files/js/jquery-2.1.1.min.js"></script><script type="text/javascript" src="files/js/bootstrap.min.js"></script><script type="text/javascript" src="files/js/jquery.tablesorter.min.js"></script><script type="text/javascript" src="files/js/d3.v3.min.js"></script><script type="text/javascript" src="files/js/script.js"></script><script type="text/javascript" src="files/js/plots.js"></script><link href="files/css/bootstrap.min.css" rel="stylesheet" type="text/css"><link href="files/css/font-awesome.min.css" rel="stylesheet" type="text/css"><link href="files/css/style.css" rel="stylesheet" type="text/css"></head>
1
+ <!DOCTYPE html><html><head><title>GeneValidator: identify problems with gene predictions</title><meta content="text/html; charset=utf-8" http-equiv="Content-Type"><script type="text/javascript" src="files/js/genevalidator.compiled.min.js"></script><link href="files/css/genevalidator.compiled.min.css" rel="stylesheet" type="text/css"></head>
2
2
  <body>
3
3
  <div aria-hidden="true" aria-labelledby="myModalLabel3" class="modal" id="spinner1" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content"><div class="modal-body text-center"><h2>Loading ...</h2><i class="fa fa-spinner fa-5x fa-spin"></i></div></div></div></div>
4
4
  <div class="modal fade" id="alert" aria-hidden="true" aria-labelledby="myModalLabel5" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">&times;</span><span class="sr-only">Close</span></button><h4 class="modal-title">Oops, Something went wrong!</h4></div><div class="modal-body"><p>This operation is not posible. There seems to be too many queries...</p></div><div class="modal-footer"><button type="button" class="btn btn-default" data-dismiss="modal">Close</button></div></div></div></div>
5
- <div class="modal fade" id="browseralert" aria-hidden="true" aria-labelledby="browser" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content browser-alert"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">&times;</span><span class="sr-only">Close</span></button><h4 class="modal-title">Incompatible Browser - Please use Mozilla Firefox!</h4></div><div class="modal-body" id="browseralertbody"><p id="browseralertText">The plugin that we use to display the validation plots means that your browser is currently unsupported.</p><p>Please use <strong>Mozilla FireFox</strong> to view this file.</p></div></div></div></div>
6
- <div class="container"><span class="menu_icon pull-right"><a href="#" id="show_all_plots" onclick="show_all_plots(this);"><i class="fa fa-2x fa-bar-chart-o"></i><br>Show All Charts</a></span><span class="menu_icon pull-right"><a href="https://github.com/wurmlab/GeneValidator" target="_blank"><i class="fa fa-2x fa-github"></i><br>Source Code</a></span><span class="menu_icon pull-right"><a href="http://wurmlab.github.io/tools/genevalidator/"><i class="fa fa-2x fa-info-circle"></i><br>About</a></span><div class="clearfix"></div><img class="logo" src="files/img/gene.png" alt="logo"><div class="page-title"><h1>Gene Validator <small>v<%= GeneValidator::VERSION %></small></h1><h4 class="subheading">Identify Problems with Gene Predictions</h4></div><br/><br/>
7
- <div id="report"><div id="report_1"></div></div><br><br>
5
+ <div class="modal fade" id="browseralert" aria-hidden="true" aria-labelledby="browser" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content browser-alert"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">&times;</span><span class="sr-only">Close</span></button><h4 class="modal-title">Incompatible Browser - Please use Mozilla Firefox!</h4></div><div class="modal-body" id="browseralertbody"><p id="browseralertText"><strong id="mainbrowseralertText">Sorry, this feature is not supported in your browser.</strong></p><p>This is because Chrome does not allow access to local files. Thus, to avoid this, simply use a different browser (like Firefox or Safari) or start a local server in the results folder.</p><p>Please use <strong>Mozilla FireFox</strong> to view this file.</p></div></div></div></div>
6
+ <div class="container"><span class="menu_icon pull-right"><a href="#" onclick="GV.toggleAllPlots(this);"><i class="fa fa-2x fa-bar-chart-o"></i><br><span id="show_all_plots">Show All Charts</span></a></span><span class="menu_icon pull-right"><a href="https://github.com/wurmlab/genevalidator" target="_blank"><i class="fa fa-2x fa-github"></i><br>Source Code</a></span><span class="menu_icon pull-right"><a href="http://wurmlab.github.io/tools/genevalidator/"><i class="fa fa-2x fa-info-circle"></i><br>About</a></span><div class="clearfix"></div><img class="logo" src="files/img/gene.png" alt="logo"><div class="page-title"><h1>Gene Validator <small>v<%= GeneValidator::VERSION %></small></h1><h4 class="subheading">Identify Problems with Gene Predictions</h4></div><br/><br/>
7
+ <div id="overview" class="text-left"><div id="overview_text"></div><button id="overview_btn" class="btn btn-primary btn-sm" data-toggle="button" onclick="GV.toggleOverviewBtn();"></button></div><br><br>
8
8
  <table id="sortable_table" class="table table-striped table-collapsed table-bordered table-condensed tablesorter"><thead>
9
9
  <tr id="header"><th>#</th><th>Ranking</th><th>Sequence Definition&nbsp;<span data-toggle="tooltip" title="Query definition as it apears in the input fasta file." data-placement="top"><i class="fa fa-question-circle"></i></span></th><th>No. Hits&nbsp;<span data-toggle="tooltip" title="Number of non-identical hits found by BLAST." data-placement="top"><i class="fa fa-question-circle"></i></span></th>
10
10
  <% @json_array[0]['validations'].each do |_short_header, item| %>
11
- <th class="sorter-false"><b><%= item['header'] %></b>
12
- <% if item['header'] == "LengthCluster" || item['header'] == "GeneMerge" || item['header'] == "MainORF" || item['header'] == "MissingExtraSequences" %>
11
+ <th class="sorter-false"><b><%= item['header'] %>&nbsp;</b>
12
+ <% if item['header'] == "Length Cluster" || item['header'] == "Gene Merge" || item['header'] == "Main ORF" || item['header'] == "Missing/Extra Sequences" %>
13
13
  <span data-toggle="tooltip" title="Charts available for this validation" data-placement="top"><i class="fa fa-bar-chart-o chartIcon"></i></span>&nbsp;<span data-toggle="tooltip" title="<%=item['description']%>" data-placement="top"><i class="fa fa-question-circle"></i></span>
14
14
  <% else %>
15
15
  <span data-toggle="tooltip" title="<%=item['description']%>" data-placement="top"><i class="fa fa-question-circle"></i></span>
data/aux/json_query.erb CHANGED
@@ -4,7 +4,8 @@
4
4
  <td title="Definition"><%= @row['definition'] %></td>
5
5
  <td title="No. Hits"><%= @row['no_hits'] %></td>
6
6
  <% @row['validations'].each do |_short_header, item| %>
7
- <td title="<%=item['header']%>" class="<%=item['status']%>"><%= item['print']%></td>
7
+ <td title="<%=item['header']%>" class="<%=item['status']%>"><%= item['print'].gsub(' ', '&nbsp;').gsub(';&nbsp;', '; ')
8
+ %></td>
8
9
  <% end %>
9
10
  <% if @row['validations'].select{|_short_header, item| item['graphs'] != nil}.map{|_short_header, item| item['graphs'].length}.inject(0){|r, e| r+e } != 0 %>
10
11
  <td><button title="Show plots" class="plot_btn btn btn-default"><i class="fa fa-bar-chart-o"></i></button></td>
@@ -1,15 +1,4 @@
1
1
  </tbody></table>
2
- <script>
3
- var evaluation_div = document.getElementById('report_1');
4
- var content_less = "<br><%= less %><div class=\"clearfix\"></div><br><button type=\"button\" onClick='add_content()' class=\"btn btn-primary btn-sm\"> Show More</button>";
5
- var content = "<%= evaluation %> <div class=\"clearfix\"></div><br><button type=\"button\" onClick='evaluation_div.innerHTML=content_less' class=\"btn btn-primary btn-sm\"> Show Less</button>";
6
- evaluation_div.innerHTML = content_less;
7
- function add_content(){
8
- evaluation_div = document.getElementById('report_1');
9
- evaluation_div.innerHTML=content;
10
- addOverallPlot('files/json/overview.json');
11
- }
12
- </script>
13
2
  </div>
14
3
  <% if output_files.length > 1 %>
15
4
  <nav><ul class="pagination">
@@ -1,10 +1,10 @@
1
- <!DOCTYPE html><html><head><title>GeneValidator: identify problems with gene predictions</title><meta content="text/html; charset=utf-8" http-equiv="Content-Type"><script type="text/javascript" src="files/js/jquery-2.1.1.min.js"></script><script type="text/javascript" src="files/js/bootstrap.min.js"></script><script type="text/javascript" src="files/js/jquery.tablesorter.min.js"></script><script type="text/javascript" src="files/js/d3.v3.min.js"></script><script type="text/javascript" src="files/js/script.js"></script><script type="text/javascript" src="files/js/plots.js"></script><link href="files/css/bootstrap.min.css" rel="stylesheet" type="text/css"><link href="files/css/font-awesome.min.css" rel="stylesheet" type="text/css"><link href="files/css/style.css" rel="stylesheet" type="text/css"></head>
1
+ <!DOCTYPE html><html><head><title>GeneValidator: identify problems with gene predictions</title><meta content="text/html; charset=utf-8" http-equiv="Content-Type"><script type="text/javascript" src="files/js/genevalidator.compiled.min.js"></script><link href="files/css/genevalidator.compiled.min.css" rel="stylesheet" type="text/css"></head>
2
2
  <body>
3
3
  <div aria-hidden="true" aria-labelledby="myModalLabel3" class="modal" id="spinner1" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content"><div class="modal-body text-center"><h2>Loading ...</h2><i class="fa fa-spinner fa-5x fa-spin"></i></div></div></div></div>
4
4
  <div class="modal fade" id="alert" aria-hidden="true" aria-labelledby="myModalLabel5" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">&times;</span><span class="sr-only">Close</span></button><h4 class="modal-title">Oops, Something went wrong!</h4></div><div class="modal-body"><p>This operation is not posible. There seems to be too many queries...</p></div><div class="modal-footer"><button type="button" class="btn btn-default" data-dismiss="modal">Close</button></div></div></div></div>
5
- <div class="modal fade" id="browseralert" aria-hidden="true" aria-labelledby="browser" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content browser-alert"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">&times;</span><span class="sr-only">Close</span></button><h4 class="modal-title">Incompatible Browser - Please use Mozilla Firefox!</h4></div><div class="modal-body" id="browseralertbody"><p id="browseralertText">The plugin that we use to display the validation plots means that your browser is currently unsupported.</p><p>Please use <strong>Mozilla FireFox</strong> to view this file.</p></div></div></div></div>
6
- <div class="container"><span class="menu_icon pull-right"><a href="#" id="show_all_plots" onclick="show_all_plots(this);"><i class="fa fa-2x fa-bar-chart-o"></i><br>Show All Charts</a></span><span class="menu_icon pull-right"><a href="https://github.com/wurmlab/GeneValidator" target="_blank"><i class="fa fa-2x fa-github"></i><br>Source Code</a></span><span class="menu_icon pull-right"><a href="http://wurmlab.github.io/tools/genevalidator/"><i class="fa fa-2x fa-info-circle"></i><br>About</a></span><div class="clearfix"></div><img class="logo" src="files/img/gene.png" alt="logo"><div class="page-title"><h1>Gene Validator <small>v<%= GeneValidator::VERSION %></small></h1><h4 class="subheading">Identify Problems with Gene Predictions</h4></div><br/><br/>
7
- <div id="report"><div id="report_1"></div></div><br><br>
5
+ <div class="modal fade" id="browseralert" aria-hidden="true" aria-labelledby="browser" role="dialog" tabindex="-1"><div class="modal-dialog"><div class="modal-content browser-alert"><div class="modal-header"><button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">&times;</span><span class="sr-only">Close</span></button><h4 class="modal-title">Incompatible Browser - Please use Mozilla Firefox!</h4></div><div class="modal-body" id="browseralertbody"><p id="browseralertText"><strong id="mainbrowseralertText">Sorry, this feature is not supported in your browser.</strong></p><p>This is because Chrome does not allow access to local files. Thus, to avoid this, simply use a different browser (like Firefox or Safari) or start a local server in the results folder.</p><p>Please use <strong>Mozilla FireFox</strong> to view this file.</p></div></div></div></div>
6
+ <div class="container"><span class="menu_icon pull-right"><a href="#" onclick="GV.toggleAllPlots(this);"><i class="fa fa-2x fa-bar-chart-o"></i><br><span id="show_all_plots">Show All Charts</span></a></span><span class="menu_icon pull-right"><a href="https://github.com/wurmlab/genevalidator" target="_blank"><i class="fa fa-2x fa-github"></i><br>Source Code</a></span><span class="menu_icon pull-right"><a href="http://wurmlab.github.io/tools/genevalidator/"><i class="fa fa-2x fa-info-circle"></i><br>About</a></span><div class="clearfix"></div><img class="logo" src="files/img/gene.png" alt="logo"><div class="page-title"><h1>Gene Validator <small>v<%= GeneValidator::VERSION %></small></h1><h4 class="subheading">Identify Problems with Gene Predictions</h4></div><br/><br/>
7
+ <div id="overview" class="text-left"><div id="overview_text"></div><button id="overview_btn" class="btn btn-primary btn-sm" data-toggle="button" onclick="GV.toggleOverviewBtn();"></button></div><br><br>
8
8
  <table id="sortable_table" class="table table-striped table-collapsed table-bordered table-condensed tablesorter"><thead>
9
9
  <tr id="header"><th>#</th><th>Ranking</th><th>Sequence Definition&nbsp;<span data-toggle="tooltip" title="Query definition as it apears in the input fasta file." data-placement="top"><i class="fa fa-question-circle"></i></span></th><th>No. Hits&nbsp;<span data-toggle="tooltip" title="Number of non-identical hits found by BLAST." data-placement="top"><i class="fa fa-question-circle"></i></span></th>
10
10
  <% @validations.each do |item| %>
@@ -1,6 +1,6 @@
1
1
  <tr data-target="toggle<%= @idx %>" data-jsonFile="files/json/<%= @config[:filename] %>_<%= @idx %>.json">
2
2
  <td title="idx"><%= @idx %></td>
3
- <td data-score="<%= overall_score %>"><div class="ratings"><div class="empty-stars"></div><div class="full-stars" style="width:<%= overall_score * 0.85 %>%;"></div></div></td>
3
+ <td data-score="<%= overall_score %>"><div class="ratings"><div class="empty-stars"></div><div class="full-stars" style="width:<%= overall_score %>%;"></div></div></td>
4
4
  <td title="Definition"><%= @prediction_def %></td>
5
5
  <td title="No. Hits"><%= @nr_hits %></td>
6
6
  <% @validations.each do |item| %>
data/bin/genevalidator CHANGED
@@ -11,7 +11,7 @@ opt_parser = OptionParser.new do |opts|
11
11
  opts.banner = <<BANNER
12
12
 
13
13
  USAGE:
14
- $ genevalidator [OPTIONS] Input_File
14
+ genevalidator [OPTIONS] Input_File
15
15
 
16
16
  ARGUMENTS:
17
17
  Input_File: Path to the input fasta file containing the predicted sequences.
@@ -38,7 +38,7 @@ BANNER
38
38
  opts.on('-d', '--db [BLAST_DATABASE]',
39
39
  'Path to the BLAST database',
40
40
  'GeneValidator also supports remote databases:',
41
- 'e.g. $ genevalidator -d "swissprot -remote" Input_File') do |db|
41
+ 'e.g. genevalidator -d "swissprot -remote" Input_File') do |db|
42
42
  opt[:db] = db
43
43
  end
44
44
 
@@ -50,8 +50,8 @@ BANNER
50
50
  end
51
51
 
52
52
  opts.on('-j', '--json_file [JSON_FILE]',
53
- 'Generate GV results from a json file (or a subset of a json file)',
54
- 'produced from GeneValidator') do |json|
53
+ 'Generate HTML report from a JSON file (or a subset of a JSON file)',
54
+ 'produced by GeneValidator') do |json|
55
55
  opt[:json_file] = json
56
56
  end
57
57
 
@@ -80,7 +80,7 @@ BANNER
80
80
  'BLAST and Mafft within GeneValidator.') do |num_threads|
81
81
  opt[:num_threads] = num_threads
82
82
  end
83
-
83
+
84
84
  opts.on('-r', '--raw_sequences [raw_seq]',
85
85
  'Supply a fasta file of the raw sequences of all BLAST hits present',
86
86
  'in the supplied BLAST XML or BLAST tabular file.') do |raw_seq|
@@ -90,7 +90,7 @@ BANNER
90
90
  opts.on('-b', '--binaries [binaries]', Array,
91
91
  'Path to BLAST and MAFFT bin folders (is added to $PATH variable)',
92
92
  'To be provided as follows:',
93
- ' $ genevalidator -b /blast/bin/path/ -b /mafft/bin/path/') do |bin|
93
+ 'e.g. genevalidator -b /blast/bin/path/ -b /mafft/bin/path/') do |bin|
94
94
  (opt[:bin] ||= []).concat(bin)
95
95
  end
96
96
 
@@ -127,9 +127,11 @@ start = Time.now
127
127
  if opt[:extract_raw_seqs] && opt[:raw_sequences].nil?
128
128
  GeneValidator.opt = opt
129
129
  GeneValidator.config = {}
130
+ # GVArgValidation.run_raw_seqs
130
131
  GeneValidator::RawSequences.run
131
132
  elsif opt[:json_file]
132
133
  GeneValidator.opt = opt
134
+ # GVArgValidation.run_json_to_gv_results
133
135
  GeneValidator::JsonToGVResults.run
134
136
  else
135
137
  GeneValidator.init(opt)
@@ -1,4 +1,4 @@
1
- # coding: utf-8
1
+ # coding: utf-8
2
2
  lib = File.expand_path('../lib', __FILE__)
3
3
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'genevalidator/version'
data/lib/genevalidator.rb CHANGED
@@ -1,5 +1,4 @@
1
1
  require 'fileutils'
2
-
3
2
  require 'bio-blastxmlparser'
4
3
 
5
4
  require 'genevalidator/arg_validation'
@@ -68,16 +67,19 @@ module GeneValidator
68
67
  # Parse the blast output and run validations
69
68
  def run
70
69
  # Run BLAST on all sequences (generates @opt[:blast_xml_file])
71
- # if no BLAST OUTPUT file provided...
70
+ # if no BLAST OUTPUT file provided...
72
71
  unless @opt[:blast_xml_file] || @opt[:blast_tabular_file]
73
72
  BlastUtils.run_blast_on_input_file
74
73
  end
75
- # Obtain fasta file of all BLAST hits
76
- RawSequences.run unless @opt[:raw_sequences]
74
+ # Obtain fasta file of all BLAST hits if running align or dup validations
75
+ if @opt[:validations].include?('align') ||
76
+ @opt[:validations].include?('dup')
77
+ RawSequences.run unless @opt[:raw_sequences]
78
+ end
77
79
  # Run Validations
78
80
  iterator = parse_blast_output_file
79
81
  (Validations.new).run_validations(iterator)
80
-
82
+
81
83
  Output.write_json_file(@config[:json_output], @config[:json_file])
82
84
  Output.print_footer(@overview, @config)
83
85
  end
@@ -26,7 +26,6 @@ module GeneValidator
26
26
  check_num_threads
27
27
 
28
28
  export_bin_dirs unless @opt[:bin].nil?
29
-
30
29
  Blast.validate(opt) unless @opt[:test]
31
30
  assert_mafft_installation
32
31
  end
@@ -53,7 +52,8 @@ module GeneValidator
53
52
  $stderr.puts 'Number of threads can not be lower than 0'
54
53
  end
55
54
  return unless @opt[:num_threads] > 256
56
- $stderr.puts "Number of threads set at #{@opt[:num_threads]} is unusually high."
55
+ $stderr.puts "Number of threads set at #{@opt[:num_threads]} is" \
56
+ ' unusually high.'
57
57
  end
58
58
 
59
59
  def assert_BLAST_output_files
@@ -69,8 +69,8 @@ module GeneValidator
69
69
  def assert_output_dir_does_not_exist
70
70
  output_dir = "#{@opt[:input_fasta_file]}.html"
71
71
  return unless File.exist?(output_dir)
72
- $stderr.puts "The output directory already exists for this fasta file.\n"
73
- $stderr.puts "Please remove the following directory: #{output_dir}\n"
72
+ $stderr.puts 'The output directory already exists for this fasta file.'
73
+ $stderr.puts "\nPlease remove the following directory: #{output_dir}\n"
74
74
  $stderr.puts "You can run the following command to remove the folder.\n"
75
75
  $stderr.puts "\n $ rm -r #{output_dir} \n"
76
76
  exit 1
@@ -102,14 +102,16 @@ module GeneValidator
102
102
  fasta_content = IO.binread(@opt[:input_fasta_file])
103
103
  type = BlastUtils.type_of_sequences(fasta_content)
104
104
  return if type == :nucleotide || type == :protein
105
- $stderr.puts '*** Error: The input files does not contain just protein or'
106
- $stderr.puts ' nucleotide data. Please correct this and try again.'
105
+ $stderr.puts '*** Error: The input files does not contain just protein'
106
+ $stderr.puts ' or nucleotide data.'
107
+ $stderr.puts ' Please correct this and try again.'
107
108
  exit 1
108
109
  end
109
110
 
110
111
  def export_bin_dirs
111
112
  @opt[:bin].each do |bin|
112
- if File.directory?(bin)
113
+ bin = File.expand_path(bin)
114
+ if File.exist?(bin) && File.directory?(bin)
113
115
  add_to_path(bin)
114
116
  else
115
117
  $stderr.puts '*** The following bin directory does not exist:'
@@ -120,6 +122,7 @@ module GeneValidator
120
122
 
121
123
  ## Checks if dir is in $PATH and if not, it adds the dir to the $PATH.
122
124
  def add_to_path(bin_dir)
125
+ return unless bin_dir
123
126
  return if ENV['PATH'].split(':').include?(bin_dir)
124
127
  ENV['PATH'] = "#{bin_dir}:#{ENV['PATH']}"
125
128
  end
@@ -157,12 +160,12 @@ module GeneValidator
157
160
 
158
161
  def warn_if_remote_database(db)
159
162
  return if db !~ /remote/
160
- $stderr.puts # a blank line
163
+ $stderr.puts # a blank line
161
164
  $stderr.puts 'Warning: BLAST will be carried out on remote servers.'
162
165
  $stderr.puts 'This may take quite a bit of time.'
163
166
  $stderr.puts 'You may want to install a local BLAST database for' \
164
167
  ' faster analyses.'
165
- $stderr.puts # a blank line
168
+ $stderr.puts # a blank line
166
169
  end
167
170
 
168
171
  def assert_local_blast_database_exists(db)
@@ -4,8 +4,8 @@ require 'forwardable'
4
4
 
5
5
  require 'genevalidator/exceptions'
6
6
  require 'genevalidator/hsp'
7
- require 'genevalidator/sequences'
8
7
  require 'genevalidator/output'
8
+ require 'genevalidator/query'
9
9
 
10
10
  module GeneValidator
11
11
  # Contains methods that run BLAST and methods that analyse sequences
@@ -36,7 +36,7 @@ module GeneValidator
36
36
  " #{threads}"
37
37
 
38
38
  cmd = "echo \"#{query}\" | #{blastcmd}"
39
- `#{cmd} 2>/dev/null`
39
+ `#{cmd} >/dev/null 2>&1`
40
40
  end
41
41
 
42
42
  ##
@@ -54,7 +54,7 @@ module GeneValidator
54
54
  num_threads = opt[:num_threads])
55
55
  return if opt[:blast_xml_file] || opt[:blast_tabular_file]
56
56
 
57
- $stderr.puts 'Running BLAST'
57
+ $stderr.puts 'Running BLAST. This may take a while.'
58
58
  opt[:blast_xml_file] = input_file + '.blast_xml'
59
59
 
60
60
  blast_type = (seq_type == :protein) ? 'blastp' : 'blastx'
@@ -65,11 +65,16 @@ module GeneValidator
65
65
  " -out '#{opt[:blast_xml_file]}' -db #{db} " \
66
66
  " -evalue #{EVALUE} -outfmt 5 #{threads}"
67
67
 
68
- `#{blastcmd}`
68
+ `#{blastcmd} >/dev/null 2>&1`
69
69
  return unless File.zero?(opt[:blast_xml_file])
70
- $stderr.puts 'Blast failed to run on the input file. Please ensure that the'
71
- $stderr.puts 'BLAST database exists and try again'
72
- exit 1
70
+ $stderr.puts 'Blast failed to run on the input file.'
71
+ if opt[:db] !~ /remote/
72
+ $stderr.puts 'Please ensure that the BLAST database exists and try'
73
+ $stderr.puts 'again.'
74
+ else
75
+ $stderr.puts 'You are using BLAST with a remote database. Please'
76
+ $stderr.puts 'ensure that you have internet access and try again.'
77
+ end
73
78
  end
74
79
 
75
80
  ##
@@ -86,7 +91,7 @@ module GeneValidator
86
91
  # parse blast the xml output and get the hits
87
92
  # hits obtained are proteins! (we use only blastp and blastx)
88
93
  iter.each do |hit|
89
- seq = Sequence.new
94
+ seq = Query.new
90
95
 
91
96
  seq.length_protein = hit.len.to_i
92
97
  seq.type = :protein
@@ -126,7 +131,8 @@ module GeneValidator
126
131
  end
127
132
  current_hsp.align_len = hsp.align_len.to_i
128
133
  current_hsp.identity = hsp.identity.to_i
129
- current_hsp.pidentity = (100 * hsp.identity / (hsp.align_len + 0.0)).round(2)
134
+ current_hsp.pidentity = (100 * hsp.identity / hsp.align_len.to_f)
135
+ .round(2)
130
136
 
131
137
  hsps.push(current_hsp)
132
138
  end
@@ -158,10 +164,11 @@ module GeneValidator
158
164
  # the first sequence does not need to have a fasta definition line
159
165
  sequences = fasta_format_string.split(/^>.*$/).delete_if(&:empty?)
160
166
  # get all sequence types
161
- sequence_types = sequences.collect { |seq| guess_sequence_type(seq) }.uniq.compact
167
+ sequence_types = sequences.collect { |seq| guess_sequence_type(seq) }
168
+ .uniq.compact
162
169
 
163
170
  return nil if sequence_types.empty?
164
- return sequence_types.first if sequence_types.length == 1
171
+ sequence_types.first if sequence_types.length == 1
165
172
  end
166
173
 
167
174
  ##
@@ -96,7 +96,8 @@ module GeneValidator
96
96
  d
97
97
  end
98
98
 
99
- # Returns the euclidian distance between the current cluster and the one given as parameter
99
+ # Returns the euclidian distance between the current cluster and the one
100
+ # given as parameter
100
101
  # Params:
101
102
  # +cluster+: Cluster object
102
103
  # +method+: 0 or 1
@@ -126,7 +127,7 @@ module GeneValidator
126
127
  # Returns within cluster sum of squares
127
128
  def wss(objects = nil)
128
129
  if objects.nil?
129
- objects = @objects.map { |x| a = Array.new(x[1], x[0]) }.flatten
130
+ objects = @objects.map { |x| Array.new(x[1], x[0]) }.flatten
130
131
  end
131
132
 
132
133
  cluster_mean = mean
@@ -181,7 +182,8 @@ module GeneValidator
181
182
  d
182
183
  end
183
184
 
184
- # Returns the euclidian distance between the current cluster and the one given as parameter
185
+ # Returns the euclidian distance between the current cluster and the one
186
+ # given as parameter
185
187
  # Params:
186
188
  # +cluster+: Cluster object
187
189
  # +method+: 0 or 1
@@ -212,7 +214,7 @@ module GeneValidator
212
214
  # Returns within cluster sum of squares
213
215
  def wss(lengths = nil)
214
216
  if lengths.nil?
215
- lengths = @lengths.map { |x| a = Array.new(x[1], x[0]) }.flatten
217
+ lengths = @lengths.map { |x| Array.new(x[1], x[0]) }.flatten
216
218
  end
217
219
 
218
220
  cluster_mean = mean
@@ -226,12 +228,13 @@ module GeneValidator
226
228
  ##
227
229
  # Returns the standard deviation of a set of values
228
230
  # Params:
229
- # +lengths+: a vector of values (optional, by default it takes the values in the cluster)
231
+ # +lengths+: a vector of values (optional, by default it takes the values
232
+ # in the cluster)
230
233
  # Output:
231
234
  # Real number
232
235
  def standard_deviation(lengths = nil)
233
236
  if lengths.nil?
234
- lengths = @lengths.map { |x| a = Array.new(x[1], x[0]) }.flatten
237
+ lengths = @lengths.map { |x| Array.new(x[1], x[0]) }.flatten
235
238
  end
236
239
 
237
240
  cluster_mean = mean
@@ -250,8 +253,8 @@ module GeneValidator
250
253
  # Output:
251
254
  # Real number
252
255
  def deviation(clusters, queryLength)
253
- hits = clusters.map { |c| c.lengths.map { |x| a = Array.new(x[1], x[0]) }.flatten }.flatten
254
- raw_hits = clusters.map { |c| c.lengths.map { |x| a = Array.new(x[1], x[0]) }.flatten }.flatten.to_s.gsub('[', '').gsub(']', '')
256
+ hits = clusters.map { |c| c.lengths.map { |x| Array.new(x[1], x[0]) }.flatten }.flatten
257
+ raw_hits = clusters.map { |c| c.lengths.map { |x| Array.new(x[1], x[0]) }.flatten }.flatten.to_s.gsub('[', '').gsub(']', '')
255
258
  R.eval("sd = sd(c(#{raw_hits}))")
256
259
  sd = R.pull('sd')
257
260
  sd = standard_deviation(hits)
@@ -313,7 +316,8 @@ module GeneValidator
313
316
  @clusters = []
314
317
  end
315
318
 
316
- def hierarchical_clusterization_2d(no_clusters = 0, distance_method = 0, vec = @values, debug = false)
319
+ def hierarchical_clusterization_2d(no_clusters = 0, distance_method = 0,
320
+ vec = @values, debug = false)
317
321
  clusters = []
318
322
 
319
323
  if vec.length == 1
@@ -332,11 +336,9 @@ module GeneValidator
332
336
 
333
337
  # clusters = array of clusters
334
338
  # initially each length belongs to a different cluster
335
- histogram.each do |elem|
336
- if debug
337
- $stderr.puts "pair (#{elem[0].x} #{elem[0].y}) appears #{elem[1]} times"
338
- end
339
- hash = { elem[0] => elem[1] }
339
+ histogram.each do |e|
340
+ $stderr.puts "pair (#{e[0].x} #{e[0].y}) appears #{e[1]} times" if debug
341
+ hash = { e[0] => e[1] }
340
342
  cluster = PairCluster.new(hash)
341
343
  clusters.push(cluster)
342
344
  end
@@ -352,7 +354,7 @@ module GeneValidator
352
354
  # stop condition 1
353
355
  break if no_clusters != 0 && clusters.length == no_clusters
354
356
 
355
- iteration = iteration + 1
357
+ iteration += iteration
356
358
  $stderr.puts "\nIteration #{iteration}" if debug
357
359
 
358
360
  min_distance = 100_000_000
@@ -363,17 +365,19 @@ module GeneValidator
363
365
  [*(0..(clusters.length - 2))].each do |i|
364
366
  [*((i + 1)..(clusters.length - 1))].each do |j|
365
367
  dist = clusters[i].distance(clusters[j], distance_method)
366
- $stderr.puts "distance between clusters #{i} and #{j} is #{dist}" if debug
368
+ if debug
369
+ $stderr.puts "distance between clusters #{i} and #{j} is #{dist}"
370
+ end
367
371
  current_density = clusters[i].density + clusters[j].density
368
372
  if dist < min_distance
369
373
  min_distance = dist
370
- cluster1 = i
371
- cluster2 = j
372
- density = current_density
374
+ cluster1 = i
375
+ cluster2 = j
376
+ density = current_density
373
377
  elsif dist == min_distance && density < current_density
374
378
  cluster1 = i
375
379
  cluster2 = j
376
- density = current_density
380
+ density = current_density
377
381
  end
378
382
  end
379
383
  end
@@ -402,8 +406,8 @@ module GeneValidator
402
406
  end
403
407
 
404
408
  ##
405
- # Makes an hierarchical clusterization until the most dense cluster is obtained
406
- # or the distance between clusters is sufficintly big
409
+ # Makes an hierarchical clusterization until the most dense cluster is
410
+ # obtained or the distance between clusters is sufficintly big
407
411
  # or the desired number of clusters is obtained
408
412
  # Params:
409
413
  # +no_clusters+: stop test (number of clusters)
@@ -412,12 +416,13 @@ module GeneValidator
412
416
  # +debug+: display debug information
413
417
  # Output:
414
418
  # vector of +Cluster+ objects
415
- def hierarchical_clusterization(no_clusters = 0, distance_method = 0, vec = @values, debug = false)
419
+ def hierarchical_clusterization(no_clusters = 0, distance_method = 0,
420
+ vec = @values, debug = false)
416
421
  clusters = []
417
422
  vec = vec.sort
418
423
 
419
424
  if vec.length == 1
420
- hash = { vec[0] => 1 }
425
+ hash = { vec[0] => 1 }
421
426
  cluster = Cluster.new(hash)
422
427
  clusters.push(cluster)
423
428
  clusters
@@ -425,7 +430,7 @@ module GeneValidator
425
430
 
426
431
  # Thresholds
427
432
  threshold_distance = (0.25 * (vec.max - vec.min))
428
- threshold_density = (0.5 * vec.length).to_i
433
+ threshold_density = (0.5 * vec.length).to_i
429
434
 
430
435
  # make a histogram from the input vector
431
436
  histogram = Hash[vec.group_by { |x| x }.map { |k, vs| [k, vs.length] }]
@@ -447,11 +452,10 @@ module GeneValidator
447
452
  # the loop stops according to the stop conditions
448
453
  iteration = 0
449
454
  loop do
450
-
451
455
  # stop condition 1
452
456
  break if no_clusters != 0 && clusters.length == no_clusters
453
457
 
454
- iteration = iteration + 1
458
+ iteration += iteration
455
459
  $stderr.puts "\nIteration #{iteration}" if debug
456
460
 
457
461
  min_distance = 100_000_000
@@ -460,7 +464,9 @@ module GeneValidator
460
464
 
461
465
  clusters[0..clusters.length - 2].each_with_index do |_item, i|
462
466
  dist = clusters[i].distance(clusters[i + 1], distance_method)
463
- $stderr.puts "distance between clusters #{i} and #{i + 1} is #{dist}" if debug
467
+ if debug
468
+ $stderr.puts "distance btwn clusters #{i} and #{i + 1} is #{dist}"
469
+ end
464
470
  current_density = clusters[i].density + clusters[i + 1].density
465
471
  if dist < min_distance
466
472
  min_distance = dist
@@ -509,9 +515,7 @@ module GeneValidator
509
515
  max_density = 0
510
516
  max_density_cluster = 0
511
517
 
512
- if clusters.nil?
513
- nil
514
- end
518
+ nil if clusters.nil?
515
519
 
516
520
  clusters.each_with_index do |item, i|
517
521
  if item.density > max_density