genevalidator 1.6.12 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +30 -1
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +13 -12
  5. data/Gemfile +4 -1
  6. data/Gemfile.lock +135 -0
  7. data/README.md +104 -122
  8. data/Rakefile +377 -5
  9. data/aux/gv_results.slim +155 -0
  10. data/aux/html_files/css/gv.compiled.min.css +8 -0
  11. data/aux/{files → html_files}/css/src/bootstrap.min.css +0 -0
  12. data/aux/{files → html_files}/css/src/font-awesome.min.css +0 -0
  13. data/aux/{files → html_files}/css/src/style.css +0 -0
  14. data/aux/{files → html_files}/fonts/FontAwesome.otf +0 -0
  15. data/aux/{files → html_files}/fonts/fontawesome-webfont.eot +0 -0
  16. data/aux/{files → html_files}/fonts/fontawesome-webfont.svg +0 -0
  17. data/aux/{files → html_files}/fonts/fontawesome-webfont.ttf +0 -0
  18. data/aux/{files → html_files}/fonts/fontawesome-webfont.woff +0 -0
  19. data/aux/{files → html_files}/img/gene.png +0 -0
  20. data/aux/html_files/js/gv.compiled.min.js +1 -0
  21. data/aux/{files → html_files}/js/src/bootstrap.min.js +0 -0
  22. data/aux/{files → html_files}/js/src/d3.v3.min.js +0 -0
  23. data/aux/{files → html_files}/js/src/jquery-2.1.1.min.js +0 -0
  24. data/aux/{files → html_files}/js/src/jquery.tablesorter.min.js +0 -0
  25. data/aux/{files → html_files}/js/src/plots.js +1 -1
  26. data/aux/{files → html_files}/js/src/script.js +0 -0
  27. data/aux/{files → html_files}/json/.gitkeep +0 -0
  28. data/bin/genevalidator +393 -56
  29. data/exemplar_data/README.md +60 -0
  30. data/{data/mrna_data.fasta → exemplar_data/mrna_data.fa} +1 -1
  31. data/{data/protein_data.fasta → exemplar_data/protein_data.fa} +0 -0
  32. data/genevalidator.gemspec +35 -20
  33. data/install.sh +92 -0
  34. data/lib/genevalidator.rb +171 -56
  35. data/lib/genevalidator/arg_validation.rb +26 -55
  36. data/lib/genevalidator/blast.rb +44 -99
  37. data/lib/genevalidator/clusterization.rb +18 -22
  38. data/lib/genevalidator/exceptions.rb +17 -17
  39. data/lib/genevalidator/ext/array.rb +21 -4
  40. data/lib/genevalidator/get_raw_sequences.rb +32 -31
  41. data/lib/genevalidator/hsp.rb +31 -2
  42. data/lib/genevalidator/json_to_gv_results.rb +38 -122
  43. data/lib/genevalidator/output.rb +158 -172
  44. data/lib/genevalidator/output_files.rb +134 -0
  45. data/lib/genevalidator/pool.rb +2 -5
  46. data/lib/genevalidator/query.rb +1 -1
  47. data/lib/genevalidator/tabular_parser.rb +8 -29
  48. data/lib/genevalidator/validation.rb +48 -90
  49. data/lib/genevalidator/validation_alignment.rb +64 -75
  50. data/lib/genevalidator/validation_blast_reading_frame.rb +13 -9
  51. data/lib/genevalidator/validation_duplication.rb +85 -84
  52. data/lib/genevalidator/validation_gene_merge.rb +46 -35
  53. data/lib/genevalidator/validation_length_cluster.rb +18 -15
  54. data/lib/genevalidator/validation_length_rank.rb +19 -15
  55. data/lib/genevalidator/validation_maker_qi.rb +13 -12
  56. data/lib/genevalidator/validation_open_reading_frame.rb +16 -13
  57. data/lib/genevalidator/validation_report.rb +1 -1
  58. data/lib/genevalidator/validation_test.rb +1 -1
  59. data/lib/genevalidator/version.rb +1 -1
  60. data/test/overall.rb +1 -1
  61. data/test/test_all_validations.rb +36 -24
  62. data/test/test_blast.rb +39 -24
  63. data/test/test_clusterization_2d.rb +4 -4
  64. data/test/test_helper.rb +2 -2
  65. data/test/test_query.rb +16 -20
  66. data/test/test_validation_open_reading_frame.rb +122 -122
  67. data/test/test_validations.rb +12 -10
  68. metadata +94 -79
  69. data/aux/files/css/genevalidator.compiled.min.css +0 -16
  70. data/aux/files/js/genevalidator.compiled.min.js +0 -28
  71. data/aux/json_footer.erb +0 -8
  72. data/aux/json_header.erb +0 -19
  73. data/aux/json_query.erb +0 -15
  74. data/aux/template_footer.erb +0 -8
  75. data/aux/template_header.erb +0 -19
  76. data/aux/template_query.erb +0 -14
  77. data/data/README.md +0 -57
  78. data/data/mrna_data.fasta.blast_tabular +0 -3567
  79. data/data/mrna_data.fasta.blast_tabular.raw_seq +0 -53998
  80. data/data/mrna_data.fasta.blast_tabular.raw_seq.idx +0 -5440
  81. data/data/mrna_data.fasta.blast_xml +0 -39800
  82. data/data/mrna_data.fasta.blast_xml.raw_seq +0 -2554
  83. data/data/mrna_data.fasta.blast_xml.raw_seq.idx +0 -3127
  84. data/data/mrna_data.fasta.json +0 -1
  85. data/data/protein_data.fasta.blast_tabular +0 -3278
  86. data/data/protein_data.fasta.blast_tabular.raw_seq +0 -61295
  87. data/data/protein_data.fasta.blast_tabular.raw_seq.idx +0 -4438
  88. data/data/protein_data.fasta.blast_xml +0 -26228
  89. data/data/protein_data.fasta.blast_xml.raw_seq +0 -9803
  90. data/data/protein_data.fasta.blast_xml.raw_seq.idx +0 -1777
  91. data/data/protein_data.fasta.json +0 -1
@@ -0,0 +1,60 @@
1
+ # Running GeneValidator with sample data
2
+
3
+ Here, we walk through the steps involved in analysing some sample data with GeneValidator. There are two options on how to run genevalidator - the second option is faster with larger input files.
4
+
5
+ ## Expected Results
6
+
7
+ <strong>protein_data.fa</strong> [See here](http://wurmlab.github.io/tools/genevalidator/examplar_data/protein_input/)
8
+ <strong>mrna_data.fa</strong> [See here](http://wurmlab.github.io/tools/genevalidator/examplar_data/genetic_input/)
9
+
10
+ ##### Running GeneValidator with a the included SwissProt Database, with four threads
11
+
12
+ ```bash
13
+ # Protein data
14
+ $ genevalidator -n 4 protein_data.fa
15
+
16
+ # MRNA data
17
+ $ genevalidator -n 4 mrna_data.fa
18
+ ```
19
+
20
+ This will produce a folder that will contain your result files.
21
+
22
+ ##### Running GeneValidator with a pre-computed BLAST XML file
23
+
24
+ For protein_data.fa:
25
+
26
+ ```
27
+ blastp -db DATABASE_PATH -num_threads 4 -out protein_data.blast.xml -query protein_data.fa -outfmt 5
28
+
29
+ # Run GeneValidator
30
+ genevalidator -d DATABASE_PATH -n 4 -x protein_data.blast.xml protein_data.fa
31
+ ```
32
+
33
+ For mrna_data.fa:
34
+
35
+ ```
36
+ blastx -db DATABASE_PATH -num_threads 4 -out mrna_data.blast.xml -query mrna_data.fa -outfmt 5
37
+
38
+ # Run GeneValidator
39
+ genevalidator -d DATABASE_PATH -n 4 -x mrna_data.blast.xml mrna_data.fa
40
+ ```
41
+
42
+ ##### Running GeneValidator with a pre-computed BLAST tabular file
43
+
44
+ For protein_data.fa:
45
+
46
+ ```
47
+ blastp -db DATABASE_PATH -num_threads 4 -out protein_data.blast.tsv -query protein_data.fa -outfmt '7 qseqid sseqid sacc slen qstart qend sstart send length qframe pident nident evalue qseq sseq'
48
+
49
+ # Run GeneValidator
50
+ genevalidator -d DATABASE_PATH -n 4 -t protein_data.blast.tsv --blast_tabular_options 'qseqid sseqid sacc slen qstart qend sstart send length qframe pident nident evalue qseq sseq' protein_data.fa
51
+ ```
52
+
53
+ For mrna_data.fa:
54
+
55
+ ```
56
+ blastp -db DATABASE_PATH -num_threads 4 -out mrna_data.blast.tsv -query mrna_data.fa -outfmt '7 qseqid sseqid sacc slen qstart qend sstart send length qframe pident nident evalue qseq sseq'
57
+
58
+ # Run GeneValidator
59
+ genevalidator -d DATABASE_PATH -n 4 -t mrna_data.blast.tsv --blast_tabular_options 'qseqid sseqid sacc slen qstart qend sstart send length qframe pident nident evalue qseq sseq' mrna_data.fa
60
+ ```
@@ -235,4 +235,4 @@ CCATGCCGGAGCATCAGTAGATCTTGCCATCTTCTCCCTTCATCTGGCAGGTGTCTCCTC
235
235
  CATCCTCGGAGCAATTAACTTTATCACCACAGCCATCAACATGAAACCACCTGCCCTCTC
236
236
  ACAATACCAAACCCCCCTATTCGTTTGATCCGTCTTAATTACCGCCATCCTTCTTCTCCT
237
237
  TTCCCTCCCAGTTCTCGCCGCTGGTATTACAATGCTTCTAACAGATCGAAATCTAAACAC
238
- TACATTCTTCGACCCTGCAGGGGGCGGAGACCCAATTTTATACCAACACTTA
238
+ TACATTCTTCGACCCTGCAGGGGGCGGAGACCCAATTTTATACCAACACTTA
@@ -1,12 +1,10 @@
1
- # coding: utf-8
2
- lib = File.expand_path('../lib', __FILE__)
1
+ lib = File.expand_path('lib', __dir__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
3
  require 'genevalidator/version'
5
4
 
6
5
  Gem::Specification.new do |s|
7
- # meta
8
6
  s.name = 'genevalidator'
9
- s.version = GeneValidator::VERSION
7
+ s.version = GeneValidator::VERSION
10
8
  s.authors = ['Monica Dragan', 'Ismail Moghul', 'Anurag Priyam',
11
9
  'Yannick Wurm']
12
10
  s.email = 'y.wurm@qmul.ac.uk'
@@ -14,19 +12,27 @@ Gem::Specification.new do |s|
14
12
  s.license = 'AGPL'
15
13
  s.summary = 'Identifying problems with gene predictions.'
16
14
  s.description = 'The tool validates the input predicted genes and provides' \
17
- ' useful information (length validation, gene merge'\
15
+ ' useful information (length validation, gene merge' \
18
16
  ' validation, sequence duplication checking, ORF finding)' \
19
17
  ' based on the similarities to genes in public databases.'
18
+ s.required_ruby_version = '>= 2.2.0'
20
19
 
21
- s.required_ruby_version = '>= 2.0.0'
22
- s.add_development_dependency 'bundler', '~> 1.6'
23
- s.add_development_dependency 'rake', '~>10.3'
24
- s.add_development_dependency 'yard', '~> 0.8'
25
- s.add_development_dependency 'codeclimate-test-reporter', '~> 0.4', '>= 0.4.7'
26
- s.add_development_dependency('minitest', '~> 5.4')
27
- s.add_dependency('bio', '~> 1.4')
28
- s.add_dependency('bio-blastxmlparser', '~>2.0')
29
- s.add_dependency('statsample', '2.0.1')
20
+ s.add_development_dependency 'minitest', '~> 5.10'
21
+ s.add_development_dependency 'rake', '~> 12.3'
22
+ s.add_development_dependency 'yard', '~> 0.9.11'
23
+
24
+ s.add_dependency 'bio', '~> 1.4'
25
+ s.add_dependency 'bio-blastxmlparser', '~> 2.0'
26
+ s.add_dependency 'genevalidatorapp', '~> 2.1.3'
27
+ s.add_dependency 'rack', '~> 2.0'
28
+ s.add_dependency 'slim', '~>3.0'
29
+ s.add_dependency 'statsample', '2.1.0'
30
+
31
+ # Adding mechanize gem just to silence a message on load.
32
+ # This is due the Statsample gem
33
+ # See https://github.com/SciRuby/daru/issues/404
34
+ # See https://github.com/SciRuby/statsample/pull/69
35
+ s.add_dependency 'mechanize', '2.7.5'
30
36
 
31
37
  s.files = `git ls-files -z`.split("\x0")
32
38
  s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
@@ -35,15 +41,24 @@ Gem::Specification.new do |s|
35
41
 
36
42
  s.post_install_message = <<INFO
37
43
 
38
- ------------------------------------------------------------------------
39
- Thank you for validating your gene predictions with GeneValidator!
44
+ ----------------------------------------------------------------------------
45
+ Thank you for validating your gene predictions with GeneValidator!
46
+
47
+ ==> To launch GeneValidator execute 'genevalidator' from command line.
48
+
49
+ genevalidator [OPTIONAL ARGUMENTS] INPUT_FILE
50
+
51
+ See 'genevalidator --help' for more information
52
+
53
+ ==> To launch GeneValidator as a web application execute 'genevalidator' from command line.
54
+
55
+ genevalidator app [OPTIONAL ARGUMENTS]
40
56
 
41
- To launch GeneValidator execute 'genevalidator' from command line.
57
+ See 'genevalidator app --help' for more information
42
58
 
43
- $ genevalidator [options] FASTA_FILE
59
+ ==> Visit https://wurmlab.github.io/tools/genevalidator/ for more information.
44
60
 
45
- Visit https://github.com/wurmlab/GeneValidator for more information.
46
- ------------------------------------------------------------------------
61
+ ----------------------------------------------------------------------------
47
62
 
48
63
  INFO
49
64
  end
@@ -0,0 +1,92 @@
1
+ #!/bin/sh
2
+
3
+ ## USAGE: bash install.sh $INSTALL_DIR
4
+ ## $ bash install.sh $INSTALL_DIR
5
+
6
+ set -eu
7
+
8
+ # OS detection
9
+ KERNEL="$(uname -s | tr '[:upper:]' '[:lower:]')"
10
+
11
+ if [ "$KERNEL" = "darwin" ]; then
12
+ PLATFORM='osx'
13
+ elif [ "$KERNEL" = "linux" ]; then
14
+ ARCH=$(uname -m)
15
+ if [ "$ARCH" = "x86_64" ]; then
16
+ PLATFORM='linux-x86_64'
17
+ else
18
+ PLATFORM='linux-x86'
19
+ fi
20
+ fi
21
+
22
+ # If there is an argument then there is where GV will installed
23
+ if [ "$0" = 'sh' ]; then
24
+ # I.e. when piping from curl
25
+ INSTALL_DIR=$PWD/genevalidator
26
+ elif [ "$0" = 'install.sh' ]; then
27
+ # I.e. when running directly
28
+ INSTALL_DIR=$PWD/genevalidator
29
+ else
30
+ INSTALL_DIR="$0"
31
+ fi
32
+
33
+ GV_URL=$(curl -s https://api.github.com/repos/wurmlab/genevalidator/releases/latest \
34
+ | grep browser_download_url \
35
+ | grep -i $PLATFORM \
36
+ | cut -d '"' -f 4)
37
+
38
+ echo >&2 "==> Installing GeneValidator to:"
39
+ echo >&2 " ${INSTALL_DIR}"
40
+ echo >&2
41
+
42
+ mkdir "${INSTALL_DIR}"
43
+ curl -SL "$GV_URL" | tar zxf - -C "${INSTALL_DIR}" --strip-components 1
44
+
45
+ echo >&2
46
+ echo >&2 "==> GeneValidator successfully installed."
47
+
48
+ ### Check which SHELL and then test different profile files
49
+ case $SHELL in
50
+ */zsh)
51
+ # assume Zsh
52
+ if test -e "${HOME}/.zshrc"; then
53
+ DOT_FILE=${HOME}/.zshrc
54
+ elif test -e "${HOME}/.zprofile"; then
55
+ DOT_FILE=${HOME}/.zprofile
56
+ elif test -e "${HOME}/.profile"; then
57
+ DOT_FILE=${HOME}/.profile
58
+ fi
59
+ ;;
60
+ */bash)
61
+ # assume Bash
62
+ if test -e "${HOME}/.bashrc"; then
63
+ DOT_FILE=${HOME}/.bashrc
64
+ elif test -e "${HOME}/.bash_profile"; then
65
+ DOT_FILE=${HOME}/.bash_profile
66
+ elif test -e "${HOME}/.profile"; then
67
+ DOT_FILE=${HOME}/.profile
68
+ fi
69
+ ;;
70
+ *)
71
+ if test -e "${HOME}/.profile"; then
72
+ DOT_FILE=${HOME}/.profile
73
+ fi
74
+ esac
75
+
76
+
77
+ if [ -z ${DOT_FILE+x} ]; then
78
+ # DOT File hasn't been set.
79
+ echo >&2
80
+ echo >&2 '==> No profile files were found.'
81
+ echo >&2 ' Please create one and add the following line to that file:'
82
+ echo >&2
83
+ echo >&2 ' export PATH="'"${INSTALL_DIR}"'/bin:${PATH}"'
84
+ else
85
+ echo >&2 'export PATH="'"${INSTALL_DIR}"'/bin:${PATH}"' >> "${DOT_FILE}"
86
+ echo >&2
87
+ echo >&2 "==> Added GeneValidator to your PATH in ${DOT_FILE}"
88
+ echo >&2
89
+ echo >&2 "==> Run \`genevalidator -h\` in a new window to get started."
90
+ fi
91
+
92
+ echo >&2
@@ -5,61 +5,36 @@ require 'genevalidator/arg_validation'
5
5
  require 'genevalidator/blast'
6
6
  require 'genevalidator/exceptions'
7
7
  require 'genevalidator/get_raw_sequences'
8
+ require 'genevalidator/json_to_gv_results'
8
9
  require 'genevalidator/output'
10
+ require 'genevalidator/output_files'
9
11
  require 'genevalidator/tabular_parser'
10
12
  require 'genevalidator/validation'
11
13
 
12
14
  # Top level module / namespace.
13
15
  module GeneValidator
14
16
  class << self
15
- attr_accessor :opt, :config, :overview
17
+ attr_accessor :opt, :config, :overview, :dirs
16
18
  attr_reader :raw_seq_file_index
17
19
  attr_reader :raw_seq_file_load
18
20
  # array of indexes for the start offsets of each query in the fasta file
19
21
  attr_reader :query_idx
20
- attr_accessor :mutex, :mutex_html, :mutex_json, :mutex_array
22
+ attr_accessor :mutex, :mutex_array
21
23
 
22
- def init(opt, start_idx = 1, summary = true)
23
- $stderr.puts 'Analysing input arguments'
24
+ def init(opt, start_idx = 1)
25
+ warn '==> Analysing input arguments'
24
26
  @opt = opt
25
27
  GVArgValidation.validate_args # validates @opt
28
+ number_of_sequences = index_the_input
26
29
 
27
- @config = {
28
- idx: 0,
29
- start_idx: start_idx,
30
- summary: summary,
31
-
32
- type: BlastUtils.guess_sequence_type_from_input_file,
33
- filename: File.basename(@opt[:input_fasta_file]),
34
- html_path: "#{@opt[:input_fasta_file]}.html",
35
- json_file: File.join(File.dirname(@opt[:input_fasta_file]),
36
- "#{File.basename(@opt[:input_fasta_file])}.json"),
37
- plot_dir: "#{@opt[:input_fasta_file]}.html/files/json",
38
- aux: File.expand_path(File.join(File.dirname(__FILE__), '../aux')),
39
-
40
- json_output: [],
41
- run_no: 0,
42
- output_max: 2500 # max no. of queries in the output file
43
- }
44
-
45
- @overview = {
46
- no_queries: 0,
47
- scores: [],
48
- good_scores: 0,
49
- bad_scores: 0,
50
- nee: 0,
51
- no_mafft: 0,
52
- no_internet: 0,
53
- map_errors: Hash.new(0),
54
- run_time: Hash.new(Pair1.new(0, 0))
55
- }
30
+ @config = setup_config(start_idx, number_of_sequences)
31
+ @dirs = setup_dirnames(@opt[:input_fasta_file])
56
32
 
57
33
  @mutex = Mutex.new
58
34
  @mutex_array = Mutex.new
59
- @mutex_html = Mutex.new
60
- @mutex_json = Mutex.new
61
- create_output_folder
62
- index_the_input
35
+
36
+ resume_from_previous_run(opt[:resumable]) unless opt[:resumable].nil?
37
+
63
38
  RawSequences.index_raw_seq_file if @opt[:raw_sequences]
64
39
  end
65
40
 
@@ -69,6 +44,8 @@ module GeneValidator
69
44
  # Run BLAST on all sequences (generates @opt[:blast_xml_file])
70
45
  # if no BLAST OUTPUT file provided...
71
46
  unless @opt[:blast_xml_file] || @opt[:blast_tabular_file]
47
+ blast_xml_fname = "#{dirs[:filename]}.blast_xml"
48
+ opt[:blast_xml_file] = File.join(dirs[:tmp_dir], blast_xml_fname)
72
49
  BlastUtils.run_blast_on_input_file
73
50
  end
74
51
  # Obtain fasta file of all BLAST hits if running align or dup validations
@@ -78,19 +55,110 @@ module GeneValidator
78
55
  end
79
56
  # Run Validations
80
57
  iterator = parse_blast_output_file
81
- (Validations.new).run_validations(iterator)
58
+ Validations.new.run_validations(iterator)
59
+ produce_output
60
+ print_directories_locations
61
+ end
62
+
63
+ ##
64
+ # Params:
65
+ # +output+: filename or stream, according to the type
66
+ # +type+: file or stream
67
+ # Returns an iterator..
68
+ def parse_blast_output_file
69
+ if @opt[:blast_xml_file]
70
+ Bio::BlastXMLParser::XmlIterator.new(@opt[:blast_xml_file]).to_enum
71
+ else
72
+ TabularParser.new
73
+ end
74
+ ## TODO: Add a Rescue statement - e.g. if unable to create the Object...
75
+ end
76
+
77
+ # Also called by json_to_gv script
78
+ def setup_dirnames(input_file)
79
+ fname = File.basename(input_file, File.extname(input_file))
80
+ out_dir = setup_output_dir(fname)
81
+ { filename: fname,
82
+ output_dir: out_dir,
83
+ tmp_dir: File.join(out_dir, 'tmp'),
84
+ json_dir: File.join(out_dir, 'tmp/json'),
85
+ html_file: File.join(out_dir, "#{fname}_results*.html"),
86
+ json_file: File.join(out_dir, "#{fname}_results.json"),
87
+ csv_file: File.join(out_dir, "#{fname}_results.csv"),
88
+ summary_file: File.join(out_dir, "#{fname}_summary.csv"),
89
+ fasta_file: File.join(out_dir, "#{fname}_results.fa"),
90
+ aux_dir: File.expand_path('../aux', __dir__) }
91
+ end
92
+
93
+ def extract_input_fasta_sequence(index)
94
+ start_offset = @query_idx[index + 1] - @query_idx[index]
95
+ end_offset = @query_idx[index]
96
+ IO.binread(@opt[:input_fasta_file], start_offset, end_offset)
97
+ end
98
+
99
+ def produce_output
100
+ @overview = Output.generate_overview(@config[:json_output],
101
+ @opt[:min_blast_hits])
102
+ eval_text = Output.generate_evaluation_text(@overview)
103
+ Output.print_console_footer(eval_text, @opt)
104
+
105
+ output_files = OutputFiles.new
106
+ output_files.write_json
107
+ output_files.write_html(eval_text)
108
+ output_files.write_csv
109
+ output_files.write_summary
110
+ output_files.print_best_fasta
111
+ end
112
+
113
+ private
114
+
115
+ def setup_config(start_idx, seq_length)
116
+ {
117
+ idx: 0,
118
+ start_idx: start_idx,
119
+
120
+ type: BlastUtils.guess_sequence_type_from_input_file,
82
121
 
83
- Output.write_json_file(@config[:json_output], @config[:json_file])
84
- Output.print_footer(@overview, @config)
122
+ json_output: Array.new(seq_length),
123
+ run_no: 0,
124
+ output_max: 2500 # max no. of queries in the output html file
125
+ }
85
126
  end
86
127
 
87
128
  ##
88
129
  # Creates the output folder and copies the auxiliar folders to this folder
89
- def create_output_folder(output_dir = @config[:html_path],
90
- aux_dir = @config[:aux])
130
+ def setup_output_dir(fname)
131
+ dir_name = "#{fname}_" + Time.now.strftime('%Y_%m_%d_%H_%M_%S')
132
+ default_outdir = File.join(Dir.pwd, dir_name)
133
+ output_dir = @opt[:output_dir].nil? ? default_outdir : @opt[:output_dir]
134
+ assert_output_dir_does_not_exist(output_dir)
91
135
  Dir.mkdir(output_dir)
92
- aux_files = File.join(aux_dir, 'files/')
93
- FileUtils.cp_r(aux_files, output_dir)
136
+ Dir.mkdir(File.join(output_dir, 'tmp'))
137
+ cp_html_files(output_dir)
138
+ output_dir
139
+ end
140
+
141
+ def assert_output_dir_does_not_exist(output_dir)
142
+ return unless Dir.exist?(output_dir)
143
+ FileUtils.rm_r(output_dir) if @opt[:force_rewrite]
144
+ return if @opt[:force_rewrite]
145
+ warn "The output directory (#{output_dir}) already exists."
146
+ warn ''
147
+ warn 'Please remove this directory before continuing.'
148
+ warn 'Alternatively, you rerun GeneValidator with the `--force` argument,'
149
+ warn 'which rewrites over any previous output.'
150
+ exit 1
151
+ end
152
+
153
+ def cp_html_files(output_dir)
154
+ if @opt[:output_formats].include? 'html'
155
+ aux_files = File.expand_path('../aux/html_files/', __dir__)
156
+ FileUtils.cp_r(aux_files, output_dir)
157
+ FileUtils.ln_s(File.join('..', 'html_files', 'json'),
158
+ File.join(output_dir, 'tmp', 'json'))
159
+ else
160
+ Dir.mkdir(File.join(output_dir, 'tmp', 'json'))
161
+ end
94
162
  end
95
163
 
96
164
  ##
@@ -99,22 +167,69 @@ module GeneValidator
99
167
  # start and end positions of each query.
100
168
  def index_the_input
101
169
  fasta_content = IO.binread(@opt[:input_fasta_file])
102
- @query_idx = fasta_content.enum_for(:scan, /(>[^>]+)/).map { Regexp.last_match.begin(0) }
170
+ @query_idx = fasta_content.enum_for(:scan, /(>[^>]+)/).map do
171
+ Regexp.last_match.begin(0)
172
+ end
103
173
  @query_idx.push(fasta_content.length)
174
+ @query_idx.length - 1
104
175
  end
105
176
 
106
- ##
107
- # Params:
108
- # +output+: filename or stream, according to the type
109
- # +type+: file or stream
110
- # Returns an iterator..
111
- def parse_blast_output_file
112
- if @opt[:blast_xml_file]
113
- Bio::BlastXMLParser::XmlIterator.new(@opt[:blast_xml_file]).to_enum
114
- else
115
- TabularParser.new
177
+ def print_directories_locations
178
+ warn '==> GeneValidator output files have been saved to:'
179
+ warn " #{File.expand_path(@dirs[:output_dir])}"
180
+ end
181
+
182
+ def resume_from_previous_run(prev_dir)
183
+ prev_tmp_dir = File.join(prev_dir, 'tmp')
184
+ return unless Dir.exist? prev_tmp_dir
185
+ copy_blast_xml_files(prev_tmp_dir)
186
+ copy_raw_seq_files(prev_tmp_dir)
187
+ copy_prev_json_output(prev_tmp_dir)
188
+ end
189
+
190
+ def copy_blast_xml_files(prev_tmp_dir)
191
+ return if @opt[:blast_xml_file] || @opt[:blast_tabular_file]
192
+ prev_blast_xml = Dir[File.join(prev_tmp_dir, '*blast_xml')]
193
+ return if prev_blast_xml.empty?
194
+ blast_xml_fname = "#{@dirs[:filename]}.blast_xml"
195
+ @opt[:blast_xml_file] = File.join(@dirs[:tmp_dir], blast_xml_fname)
196
+ FileUtils.cp(prev_blast_xml[0], @opt[:blast_xml_file])
197
+ end
198
+
199
+ def copy_raw_seq_files(prev_tmp_dir)
200
+ return if @opt[:raw_sequences]
201
+ return unless @opt[:validations].include?('align') ||
202
+ @opt[:validations].include?('dup')
203
+ prev_raw_seq = Dir[File.join(prev_tmp_dir, '*raw_seq')]
204
+ return if prev_raw_seq.empty?
205
+ raw_seq_fname = "#{@dirs[:filename]}.blast_xml.raw_seq"
206
+ @opt[:raw_sequences] = File.join(@dirs[:tmp_dir], raw_seq_fname)
207
+ FileUtils.cp(prev_raw_seq[0], @opt[:raw_sequences])
208
+ end
209
+
210
+ def copy_prev_json_output(prev_tmp_dir)
211
+ prev_json_dir = File.join(prev_tmp_dir, 'json')
212
+ return unless Dir.exist? prev_json_dir
213
+ all_jsons = Dir[File.join(prev_json_dir, '*.json')]
214
+ FileUtils.cp(all_jsons, @dirs[:json_dir])
215
+ overview_json = Dir[File.join(prev_json_dir, 'overview.json')]
216
+ data_jsons = all_jsons - overview_json
217
+ parse_prev_json(data_jsons)
218
+ end
219
+
220
+ def parse_prev_json(data_jsons)
221
+ data_jsons.each do |json|
222
+ json_contents = File.read(File.expand_path(json))
223
+ data = JSON.parse(json_contents, symbolize_names: true)
224
+ idx = json.match(/(\d+).json/)[1].to_i - 1
225
+ @config[:json_output][idx] = data
226
+ print_prev_json_to_console(data)
116
227
  end
117
- ## TODO: Add a Rescue statement - e.g. if unable to create the Object...
228
+ end
229
+
230
+ def print_prev_json_to_console(data)
231
+ JsonToGVResults.print_console_header(data)
232
+ JsonToGVResults.print_output_console(data)
118
233
  end
119
234
  end
120
235
  end