genevalidator 1.6.12 → 2.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +30 -1
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +13 -12
  5. data/Gemfile +4 -1
  6. data/Gemfile.lock +135 -0
  7. data/README.md +104 -122
  8. data/Rakefile +377 -5
  9. data/aux/gv_results.slim +155 -0
  10. data/aux/html_files/css/gv.compiled.min.css +8 -0
  11. data/aux/{files → html_files}/css/src/bootstrap.min.css +0 -0
  12. data/aux/{files → html_files}/css/src/font-awesome.min.css +0 -0
  13. data/aux/{files → html_files}/css/src/style.css +0 -0
  14. data/aux/{files → html_files}/fonts/FontAwesome.otf +0 -0
  15. data/aux/{files → html_files}/fonts/fontawesome-webfont.eot +0 -0
  16. data/aux/{files → html_files}/fonts/fontawesome-webfont.svg +0 -0
  17. data/aux/{files → html_files}/fonts/fontawesome-webfont.ttf +0 -0
  18. data/aux/{files → html_files}/fonts/fontawesome-webfont.woff +0 -0
  19. data/aux/{files → html_files}/img/gene.png +0 -0
  20. data/aux/html_files/js/gv.compiled.min.js +1 -0
  21. data/aux/{files → html_files}/js/src/bootstrap.min.js +0 -0
  22. data/aux/{files → html_files}/js/src/d3.v3.min.js +0 -0
  23. data/aux/{files → html_files}/js/src/jquery-2.1.1.min.js +0 -0
  24. data/aux/{files → html_files}/js/src/jquery.tablesorter.min.js +0 -0
  25. data/aux/{files → html_files}/js/src/plots.js +1 -1
  26. data/aux/{files → html_files}/js/src/script.js +0 -0
  27. data/aux/{files → html_files}/json/.gitkeep +0 -0
  28. data/bin/genevalidator +393 -56
  29. data/exemplar_data/README.md +60 -0
  30. data/{data/mrna_data.fasta → exemplar_data/mrna_data.fa} +1 -1
  31. data/{data/protein_data.fasta → exemplar_data/protein_data.fa} +0 -0
  32. data/genevalidator.gemspec +35 -20
  33. data/install.sh +92 -0
  34. data/lib/genevalidator.rb +171 -56
  35. data/lib/genevalidator/arg_validation.rb +26 -55
  36. data/lib/genevalidator/blast.rb +44 -99
  37. data/lib/genevalidator/clusterization.rb +18 -22
  38. data/lib/genevalidator/exceptions.rb +17 -17
  39. data/lib/genevalidator/ext/array.rb +21 -4
  40. data/lib/genevalidator/get_raw_sequences.rb +32 -31
  41. data/lib/genevalidator/hsp.rb +31 -2
  42. data/lib/genevalidator/json_to_gv_results.rb +38 -122
  43. data/lib/genevalidator/output.rb +158 -172
  44. data/lib/genevalidator/output_files.rb +134 -0
  45. data/lib/genevalidator/pool.rb +2 -5
  46. data/lib/genevalidator/query.rb +1 -1
  47. data/lib/genevalidator/tabular_parser.rb +8 -29
  48. data/lib/genevalidator/validation.rb +48 -90
  49. data/lib/genevalidator/validation_alignment.rb +64 -75
  50. data/lib/genevalidator/validation_blast_reading_frame.rb +13 -9
  51. data/lib/genevalidator/validation_duplication.rb +85 -84
  52. data/lib/genevalidator/validation_gene_merge.rb +46 -35
  53. data/lib/genevalidator/validation_length_cluster.rb +18 -15
  54. data/lib/genevalidator/validation_length_rank.rb +19 -15
  55. data/lib/genevalidator/validation_maker_qi.rb +13 -12
  56. data/lib/genevalidator/validation_open_reading_frame.rb +16 -13
  57. data/lib/genevalidator/validation_report.rb +1 -1
  58. data/lib/genevalidator/validation_test.rb +1 -1
  59. data/lib/genevalidator/version.rb +1 -1
  60. data/test/overall.rb +1 -1
  61. data/test/test_all_validations.rb +36 -24
  62. data/test/test_blast.rb +39 -24
  63. data/test/test_clusterization_2d.rb +4 -4
  64. data/test/test_helper.rb +2 -2
  65. data/test/test_query.rb +16 -20
  66. data/test/test_validation_open_reading_frame.rb +122 -122
  67. data/test/test_validations.rb +12 -10
  68. metadata +94 -79
  69. data/aux/files/css/genevalidator.compiled.min.css +0 -16
  70. data/aux/files/js/genevalidator.compiled.min.js +0 -28
  71. data/aux/json_footer.erb +0 -8
  72. data/aux/json_header.erb +0 -19
  73. data/aux/json_query.erb +0 -15
  74. data/aux/template_footer.erb +0 -8
  75. data/aux/template_header.erb +0 -19
  76. data/aux/template_query.erb +0 -14
  77. data/data/README.md +0 -57
  78. data/data/mrna_data.fasta.blast_tabular +0 -3567
  79. data/data/mrna_data.fasta.blast_tabular.raw_seq +0 -53998
  80. data/data/mrna_data.fasta.blast_tabular.raw_seq.idx +0 -5440
  81. data/data/mrna_data.fasta.blast_xml +0 -39800
  82. data/data/mrna_data.fasta.blast_xml.raw_seq +0 -2554
  83. data/data/mrna_data.fasta.blast_xml.raw_seq.idx +0 -3127
  84. data/data/mrna_data.fasta.json +0 -1
  85. data/data/protein_data.fasta.blast_tabular +0 -3278
  86. data/data/protein_data.fasta.blast_tabular.raw_seq +0 -61295
  87. data/data/protein_data.fasta.blast_tabular.raw_seq.idx +0 -4438
  88. data/data/protein_data.fasta.blast_xml +0 -26228
  89. data/data/protein_data.fasta.blast_xml.raw_seq +0 -9803
  90. data/data/protein_data.fasta.blast_xml.raw_seq.idx +0 -1777
  91. data/data/protein_data.fasta.json +0 -1
@@ -0,0 +1,60 @@
1
+ # Running GeneValidator with sample data
2
+
3
+ Here, we walk through the steps involved in analysing some sample data with GeneValidator. There are two options on how to run genevalidator - the second option is faster with larger input files.
4
+
5
+ ## Expected Results
6
+
7
+ <strong>protein_data.fa</strong> [See here](http://wurmlab.github.io/tools/genevalidator/examplar_data/protein_input/)
8
+ <strong>mrna_data.fa</strong> [See here](http://wurmlab.github.io/tools/genevalidator/examplar_data/genetic_input/)
9
+
10
+ ##### Running GeneValidator with a the included SwissProt Database, with four threads
11
+
12
+ ```bash
13
+ # Protein data
14
+ $ genevalidator -n 4 protein_data.fa
15
+
16
+ # MRNA data
17
+ $ genevalidator -n 4 mrna_data.fa
18
+ ```
19
+
20
+ This will produce a folder that will contain your result files.
21
+
22
+ ##### Running GeneValidator with a pre-computed BLAST XML file
23
+
24
+ For protein_data.fa:
25
+
26
+ ```
27
+ blastp -db DATABASE_PATH -num_threads 4 -out protein_data.blast.xml -query protein_data.fa -outfmt 5
28
+
29
+ # Run GeneValidator
30
+ genevalidator -d DATABASE_PATH -n 4 -x protein_data.blast.xml protein_data.fa
31
+ ```
32
+
33
+ For mrna_data.fa:
34
+
35
+ ```
36
+ blastx -db DATABASE_PATH -num_threads 4 -out mrna_data.blast.xml -query mrna_data.fa -outfmt 5
37
+
38
+ # Run GeneValidator
39
+ genevalidator -d DATABASE_PATH -n 4 -x mrna_data.blast.xml mrna_data.fa
40
+ ```
41
+
42
+ ##### Running GeneValidator with a pre-computed BLAST tabular file
43
+
44
+ For protein_data.fa:
45
+
46
+ ```
47
+ blastp -db DATABASE_PATH -num_threads 4 -out protein_data.blast.tsv -query protein_data.fa -outfmt '7 qseqid sseqid sacc slen qstart qend sstart send length qframe pident nident evalue qseq sseq'
48
+
49
+ # Run GeneValidator
50
+ genevalidator -d DATABASE_PATH -n 4 -t protein_data.blast.tsv --blast_tabular_options 'qseqid sseqid sacc slen qstart qend sstart send length qframe pident nident evalue qseq sseq' protein_data.fa
51
+ ```
52
+
53
+ For mrna_data.fa:
54
+
55
+ ```
56
+ blastp -db DATABASE_PATH -num_threads 4 -out mrna_data.blast.tsv -query mrna_data.fa -outfmt '7 qseqid sseqid sacc slen qstart qend sstart send length qframe pident nident evalue qseq sseq'
57
+
58
+ # Run GeneValidator
59
+ genevalidator -d DATABASE_PATH -n 4 -t mrna_data.blast.tsv --blast_tabular_options 'qseqid sseqid sacc slen qstart qend sstart send length qframe pident nident evalue qseq sseq' mrna_data.fa
60
+ ```
@@ -235,4 +235,4 @@ CCATGCCGGAGCATCAGTAGATCTTGCCATCTTCTCCCTTCATCTGGCAGGTGTCTCCTC
235
235
  CATCCTCGGAGCAATTAACTTTATCACCACAGCCATCAACATGAAACCACCTGCCCTCTC
236
236
  ACAATACCAAACCCCCCTATTCGTTTGATCCGTCTTAATTACCGCCATCCTTCTTCTCCT
237
237
  TTCCCTCCCAGTTCTCGCCGCTGGTATTACAATGCTTCTAACAGATCGAAATCTAAACAC
238
- TACATTCTTCGACCCTGCAGGGGGCGGAGACCCAATTTTATACCAACACTTA
238
+ TACATTCTTCGACCCTGCAGGGGGCGGAGACCCAATTTTATACCAACACTTA
@@ -1,12 +1,10 @@
1
- # coding: utf-8
2
- lib = File.expand_path('../lib', __FILE__)
1
+ lib = File.expand_path('lib', __dir__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
3
  require 'genevalidator/version'
5
4
 
6
5
  Gem::Specification.new do |s|
7
- # meta
8
6
  s.name = 'genevalidator'
9
- s.version = GeneValidator::VERSION
7
+ s.version = GeneValidator::VERSION
10
8
  s.authors = ['Monica Dragan', 'Ismail Moghul', 'Anurag Priyam',
11
9
  'Yannick Wurm']
12
10
  s.email = 'y.wurm@qmul.ac.uk'
@@ -14,19 +12,27 @@ Gem::Specification.new do |s|
14
12
  s.license = 'AGPL'
15
13
  s.summary = 'Identifying problems with gene predictions.'
16
14
  s.description = 'The tool validates the input predicted genes and provides' \
17
- ' useful information (length validation, gene merge'\
15
+ ' useful information (length validation, gene merge' \
18
16
  ' validation, sequence duplication checking, ORF finding)' \
19
17
  ' based on the similarities to genes in public databases.'
18
+ s.required_ruby_version = '>= 2.2.0'
20
19
 
21
- s.required_ruby_version = '>= 2.0.0'
22
- s.add_development_dependency 'bundler', '~> 1.6'
23
- s.add_development_dependency 'rake', '~>10.3'
24
- s.add_development_dependency 'yard', '~> 0.8'
25
- s.add_development_dependency 'codeclimate-test-reporter', '~> 0.4', '>= 0.4.7'
26
- s.add_development_dependency('minitest', '~> 5.4')
27
- s.add_dependency('bio', '~> 1.4')
28
- s.add_dependency('bio-blastxmlparser', '~>2.0')
29
- s.add_dependency('statsample', '2.0.1')
20
+ s.add_development_dependency 'minitest', '~> 5.10'
21
+ s.add_development_dependency 'rake', '~> 12.3'
22
+ s.add_development_dependency 'yard', '~> 0.9.11'
23
+
24
+ s.add_dependency 'bio', '~> 1.4'
25
+ s.add_dependency 'bio-blastxmlparser', '~> 2.0'
26
+ s.add_dependency 'genevalidatorapp', '~> 2.1.3'
27
+ s.add_dependency 'rack', '~> 2.0'
28
+ s.add_dependency 'slim', '~>3.0'
29
+ s.add_dependency 'statsample', '2.1.0'
30
+
31
+ # Adding mechanize gem just to silence a message on load.
32
+ # This is due the Statsample gem
33
+ # See https://github.com/SciRuby/daru/issues/404
34
+ # See https://github.com/SciRuby/statsample/pull/69
35
+ s.add_dependency 'mechanize', '2.7.5'
30
36
 
31
37
  s.files = `git ls-files -z`.split("\x0")
32
38
  s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
@@ -35,15 +41,24 @@ Gem::Specification.new do |s|
35
41
 
36
42
  s.post_install_message = <<INFO
37
43
 
38
- ------------------------------------------------------------------------
39
- Thank you for validating your gene predictions with GeneValidator!
44
+ ----------------------------------------------------------------------------
45
+ Thank you for validating your gene predictions with GeneValidator!
46
+
47
+ ==> To launch GeneValidator execute 'genevalidator' from command line.
48
+
49
+ genevalidator [OPTIONAL ARGUMENTS] INPUT_FILE
50
+
51
+ See 'genevalidator --help' for more information
52
+
53
+ ==> To launch GeneValidator as a web application execute 'genevalidator' from command line.
54
+
55
+ genevalidator app [OPTIONAL ARGUMENTS]
40
56
 
41
- To launch GeneValidator execute 'genevalidator' from command line.
57
+ See 'genevalidator app --help' for more information
42
58
 
43
- $ genevalidator [options] FASTA_FILE
59
+ ==> Visit https://wurmlab.github.io/tools/genevalidator/ for more information.
44
60
 
45
- Visit https://github.com/wurmlab/GeneValidator for more information.
46
- ------------------------------------------------------------------------
61
+ ----------------------------------------------------------------------------
47
62
 
48
63
  INFO
49
64
  end
@@ -0,0 +1,92 @@
1
+ #!/bin/sh
2
+
3
+ ## USAGE: bash install.sh $INSTALL_DIR
4
+ ## $ bash install.sh $INSTALL_DIR
5
+
6
+ set -eu
7
+
8
+ # OS detection
9
+ KERNEL="$(uname -s | tr '[:upper:]' '[:lower:]')"
10
+
11
+ if [ "$KERNEL" = "darwin" ]; then
12
+ PLATFORM='osx'
13
+ elif [ "$KERNEL" = "linux" ]; then
14
+ ARCH=$(uname -m)
15
+ if [ "$ARCH" = "x86_64" ]; then
16
+ PLATFORM='linux-x86_64'
17
+ else
18
+ PLATFORM='linux-x86'
19
+ fi
20
+ fi
21
+
22
+ # If there is an argument then there is where GV will installed
23
+ if [ "$0" = 'sh' ]; then
24
+ # I.e. when piping from curl
25
+ INSTALL_DIR=$PWD/genevalidator
26
+ elif [ "$0" = 'install.sh' ]; then
27
+ # I.e. when running directly
28
+ INSTALL_DIR=$PWD/genevalidator
29
+ else
30
+ INSTALL_DIR="$0"
31
+ fi
32
+
33
+ GV_URL=$(curl -s https://api.github.com/repos/wurmlab/genevalidator/releases/latest \
34
+ | grep browser_download_url \
35
+ | grep -i $PLATFORM \
36
+ | cut -d '"' -f 4)
37
+
38
+ echo >&2 "==> Installing GeneValidator to:"
39
+ echo >&2 " ${INSTALL_DIR}"
40
+ echo >&2
41
+
42
+ mkdir "${INSTALL_DIR}"
43
+ curl -SL "$GV_URL" | tar zxf - -C "${INSTALL_DIR}" --strip-components 1
44
+
45
+ echo >&2
46
+ echo >&2 "==> GeneValidator successfully installed."
47
+
48
+ ### Check which SHELL and then test different profile files
49
+ case $SHELL in
50
+ */zsh)
51
+ # assume Zsh
52
+ if test -e "${HOME}/.zshrc"; then
53
+ DOT_FILE=${HOME}/.zshrc
54
+ elif test -e "${HOME}/.zprofile"; then
55
+ DOT_FILE=${HOME}/.zprofile
56
+ elif test -e "${HOME}/.profile"; then
57
+ DOT_FILE=${HOME}/.profile
58
+ fi
59
+ ;;
60
+ */bash)
61
+ # assume Bash
62
+ if test -e "${HOME}/.bashrc"; then
63
+ DOT_FILE=${HOME}/.bashrc
64
+ elif test -e "${HOME}/.bash_profile"; then
65
+ DOT_FILE=${HOME}/.bash_profile
66
+ elif test -e "${HOME}/.profile"; then
67
+ DOT_FILE=${HOME}/.profile
68
+ fi
69
+ ;;
70
+ *)
71
+ if test -e "${HOME}/.profile"; then
72
+ DOT_FILE=${HOME}/.profile
73
+ fi
74
+ esac
75
+
76
+
77
+ if [ -z ${DOT_FILE+x} ]; then
78
+ # DOT File hasn't been set.
79
+ echo >&2
80
+ echo >&2 '==> No profile files were found.'
81
+ echo >&2 ' Please create one and add the following line to that file:'
82
+ echo >&2
83
+ echo >&2 ' export PATH="'"${INSTALL_DIR}"'/bin:${PATH}"'
84
+ else
85
+ echo >&2 'export PATH="'"${INSTALL_DIR}"'/bin:${PATH}"' >> "${DOT_FILE}"
86
+ echo >&2
87
+ echo >&2 "==> Added GeneValidator to your PATH in ${DOT_FILE}"
88
+ echo >&2
89
+ echo >&2 "==> Run \`genevalidator -h\` in a new window to get started."
90
+ fi
91
+
92
+ echo >&2
@@ -5,61 +5,36 @@ require 'genevalidator/arg_validation'
5
5
  require 'genevalidator/blast'
6
6
  require 'genevalidator/exceptions'
7
7
  require 'genevalidator/get_raw_sequences'
8
+ require 'genevalidator/json_to_gv_results'
8
9
  require 'genevalidator/output'
10
+ require 'genevalidator/output_files'
9
11
  require 'genevalidator/tabular_parser'
10
12
  require 'genevalidator/validation'
11
13
 
12
14
  # Top level module / namespace.
13
15
  module GeneValidator
14
16
  class << self
15
- attr_accessor :opt, :config, :overview
17
+ attr_accessor :opt, :config, :overview, :dirs
16
18
  attr_reader :raw_seq_file_index
17
19
  attr_reader :raw_seq_file_load
18
20
  # array of indexes for the start offsets of each query in the fasta file
19
21
  attr_reader :query_idx
20
- attr_accessor :mutex, :mutex_html, :mutex_json, :mutex_array
22
+ attr_accessor :mutex, :mutex_array
21
23
 
22
- def init(opt, start_idx = 1, summary = true)
23
- $stderr.puts 'Analysing input arguments'
24
+ def init(opt, start_idx = 1)
25
+ warn '==> Analysing input arguments'
24
26
  @opt = opt
25
27
  GVArgValidation.validate_args # validates @opt
28
+ number_of_sequences = index_the_input
26
29
 
27
- @config = {
28
- idx: 0,
29
- start_idx: start_idx,
30
- summary: summary,
31
-
32
- type: BlastUtils.guess_sequence_type_from_input_file,
33
- filename: File.basename(@opt[:input_fasta_file]),
34
- html_path: "#{@opt[:input_fasta_file]}.html",
35
- json_file: File.join(File.dirname(@opt[:input_fasta_file]),
36
- "#{File.basename(@opt[:input_fasta_file])}.json"),
37
- plot_dir: "#{@opt[:input_fasta_file]}.html/files/json",
38
- aux: File.expand_path(File.join(File.dirname(__FILE__), '../aux')),
39
-
40
- json_output: [],
41
- run_no: 0,
42
- output_max: 2500 # max no. of queries in the output file
43
- }
44
-
45
- @overview = {
46
- no_queries: 0,
47
- scores: [],
48
- good_scores: 0,
49
- bad_scores: 0,
50
- nee: 0,
51
- no_mafft: 0,
52
- no_internet: 0,
53
- map_errors: Hash.new(0),
54
- run_time: Hash.new(Pair1.new(0, 0))
55
- }
30
+ @config = setup_config(start_idx, number_of_sequences)
31
+ @dirs = setup_dirnames(@opt[:input_fasta_file])
56
32
 
57
33
  @mutex = Mutex.new
58
34
  @mutex_array = Mutex.new
59
- @mutex_html = Mutex.new
60
- @mutex_json = Mutex.new
61
- create_output_folder
62
- index_the_input
35
+
36
+ resume_from_previous_run(opt[:resumable]) unless opt[:resumable].nil?
37
+
63
38
  RawSequences.index_raw_seq_file if @opt[:raw_sequences]
64
39
  end
65
40
 
@@ -69,6 +44,8 @@ module GeneValidator
69
44
  # Run BLAST on all sequences (generates @opt[:blast_xml_file])
70
45
  # if no BLAST OUTPUT file provided...
71
46
  unless @opt[:blast_xml_file] || @opt[:blast_tabular_file]
47
+ blast_xml_fname = "#{dirs[:filename]}.blast_xml"
48
+ opt[:blast_xml_file] = File.join(dirs[:tmp_dir], blast_xml_fname)
72
49
  BlastUtils.run_blast_on_input_file
73
50
  end
74
51
  # Obtain fasta file of all BLAST hits if running align or dup validations
@@ -78,19 +55,110 @@ module GeneValidator
78
55
  end
79
56
  # Run Validations
80
57
  iterator = parse_blast_output_file
81
- (Validations.new).run_validations(iterator)
58
+ Validations.new.run_validations(iterator)
59
+ produce_output
60
+ print_directories_locations
61
+ end
62
+
63
+ ##
64
+ # Params:
65
+ # +output+: filename or stream, according to the type
66
+ # +type+: file or stream
67
+ # Returns an iterator..
68
+ def parse_blast_output_file
69
+ if @opt[:blast_xml_file]
70
+ Bio::BlastXMLParser::XmlIterator.new(@opt[:blast_xml_file]).to_enum
71
+ else
72
+ TabularParser.new
73
+ end
74
+ ## TODO: Add a Rescue statement - e.g. if unable to create the Object...
75
+ end
76
+
77
+ # Also called by json_to_gv script
78
+ def setup_dirnames(input_file)
79
+ fname = File.basename(input_file, File.extname(input_file))
80
+ out_dir = setup_output_dir(fname)
81
+ { filename: fname,
82
+ output_dir: out_dir,
83
+ tmp_dir: File.join(out_dir, 'tmp'),
84
+ json_dir: File.join(out_dir, 'tmp/json'),
85
+ html_file: File.join(out_dir, "#{fname}_results*.html"),
86
+ json_file: File.join(out_dir, "#{fname}_results.json"),
87
+ csv_file: File.join(out_dir, "#{fname}_results.csv"),
88
+ summary_file: File.join(out_dir, "#{fname}_summary.csv"),
89
+ fasta_file: File.join(out_dir, "#{fname}_results.fa"),
90
+ aux_dir: File.expand_path('../aux', __dir__) }
91
+ end
92
+
93
+ def extract_input_fasta_sequence(index)
94
+ start_offset = @query_idx[index + 1] - @query_idx[index]
95
+ end_offset = @query_idx[index]
96
+ IO.binread(@opt[:input_fasta_file], start_offset, end_offset)
97
+ end
98
+
99
+ def produce_output
100
+ @overview = Output.generate_overview(@config[:json_output],
101
+ @opt[:min_blast_hits])
102
+ eval_text = Output.generate_evaluation_text(@overview)
103
+ Output.print_console_footer(eval_text, @opt)
104
+
105
+ output_files = OutputFiles.new
106
+ output_files.write_json
107
+ output_files.write_html(eval_text)
108
+ output_files.write_csv
109
+ output_files.write_summary
110
+ output_files.print_best_fasta
111
+ end
112
+
113
+ private
114
+
115
+ def setup_config(start_idx, seq_length)
116
+ {
117
+ idx: 0,
118
+ start_idx: start_idx,
119
+
120
+ type: BlastUtils.guess_sequence_type_from_input_file,
82
121
 
83
- Output.write_json_file(@config[:json_output], @config[:json_file])
84
- Output.print_footer(@overview, @config)
122
+ json_output: Array.new(seq_length),
123
+ run_no: 0,
124
+ output_max: 2500 # max no. of queries in the output html file
125
+ }
85
126
  end
86
127
 
87
128
  ##
88
129
  # Creates the output folder and copies the auxiliar folders to this folder
89
- def create_output_folder(output_dir = @config[:html_path],
90
- aux_dir = @config[:aux])
130
+ def setup_output_dir(fname)
131
+ dir_name = "#{fname}_" + Time.now.strftime('%Y_%m_%d_%H_%M_%S')
132
+ default_outdir = File.join(Dir.pwd, dir_name)
133
+ output_dir = @opt[:output_dir].nil? ? default_outdir : @opt[:output_dir]
134
+ assert_output_dir_does_not_exist(output_dir)
91
135
  Dir.mkdir(output_dir)
92
- aux_files = File.join(aux_dir, 'files/')
93
- FileUtils.cp_r(aux_files, output_dir)
136
+ Dir.mkdir(File.join(output_dir, 'tmp'))
137
+ cp_html_files(output_dir)
138
+ output_dir
139
+ end
140
+
141
+ def assert_output_dir_does_not_exist(output_dir)
142
+ return unless Dir.exist?(output_dir)
143
+ FileUtils.rm_r(output_dir) if @opt[:force_rewrite]
144
+ return if @opt[:force_rewrite]
145
+ warn "The output directory (#{output_dir}) already exists."
146
+ warn ''
147
+ warn 'Please remove this directory before continuing.'
148
+ warn 'Alternatively, you rerun GeneValidator with the `--force` argument,'
149
+ warn 'which rewrites over any previous output.'
150
+ exit 1
151
+ end
152
+
153
+ def cp_html_files(output_dir)
154
+ if @opt[:output_formats].include? 'html'
155
+ aux_files = File.expand_path('../aux/html_files/', __dir__)
156
+ FileUtils.cp_r(aux_files, output_dir)
157
+ FileUtils.ln_s(File.join('..', 'html_files', 'json'),
158
+ File.join(output_dir, 'tmp', 'json'))
159
+ else
160
+ Dir.mkdir(File.join(output_dir, 'tmp', 'json'))
161
+ end
94
162
  end
95
163
 
96
164
  ##
@@ -99,22 +167,69 @@ module GeneValidator
99
167
  # start and end positions of each query.
100
168
  def index_the_input
101
169
  fasta_content = IO.binread(@opt[:input_fasta_file])
102
- @query_idx = fasta_content.enum_for(:scan, /(>[^>]+)/).map { Regexp.last_match.begin(0) }
170
+ @query_idx = fasta_content.enum_for(:scan, /(>[^>]+)/).map do
171
+ Regexp.last_match.begin(0)
172
+ end
103
173
  @query_idx.push(fasta_content.length)
174
+ @query_idx.length - 1
104
175
  end
105
176
 
106
- ##
107
- # Params:
108
- # +output+: filename or stream, according to the type
109
- # +type+: file or stream
110
- # Returns an iterator..
111
- def parse_blast_output_file
112
- if @opt[:blast_xml_file]
113
- Bio::BlastXMLParser::XmlIterator.new(@opt[:blast_xml_file]).to_enum
114
- else
115
- TabularParser.new
177
+ def print_directories_locations
178
+ warn '==> GeneValidator output files have been saved to:'
179
+ warn " #{File.expand_path(@dirs[:output_dir])}"
180
+ end
181
+
182
+ def resume_from_previous_run(prev_dir)
183
+ prev_tmp_dir = File.join(prev_dir, 'tmp')
184
+ return unless Dir.exist? prev_tmp_dir
185
+ copy_blast_xml_files(prev_tmp_dir)
186
+ copy_raw_seq_files(prev_tmp_dir)
187
+ copy_prev_json_output(prev_tmp_dir)
188
+ end
189
+
190
+ def copy_blast_xml_files(prev_tmp_dir)
191
+ return if @opt[:blast_xml_file] || @opt[:blast_tabular_file]
192
+ prev_blast_xml = Dir[File.join(prev_tmp_dir, '*blast_xml')]
193
+ return if prev_blast_xml.empty?
194
+ blast_xml_fname = "#{@dirs[:filename]}.blast_xml"
195
+ @opt[:blast_xml_file] = File.join(@dirs[:tmp_dir], blast_xml_fname)
196
+ FileUtils.cp(prev_blast_xml[0], @opt[:blast_xml_file])
197
+ end
198
+
199
+ def copy_raw_seq_files(prev_tmp_dir)
200
+ return if @opt[:raw_sequences]
201
+ return unless @opt[:validations].include?('align') ||
202
+ @opt[:validations].include?('dup')
203
+ prev_raw_seq = Dir[File.join(prev_tmp_dir, '*raw_seq')]
204
+ return if prev_raw_seq.empty?
205
+ raw_seq_fname = "#{@dirs[:filename]}.blast_xml.raw_seq"
206
+ @opt[:raw_sequences] = File.join(@dirs[:tmp_dir], raw_seq_fname)
207
+ FileUtils.cp(prev_raw_seq[0], @opt[:raw_sequences])
208
+ end
209
+
210
+ def copy_prev_json_output(prev_tmp_dir)
211
+ prev_json_dir = File.join(prev_tmp_dir, 'json')
212
+ return unless Dir.exist? prev_json_dir
213
+ all_jsons = Dir[File.join(prev_json_dir, '*.json')]
214
+ FileUtils.cp(all_jsons, @dirs[:json_dir])
215
+ overview_json = Dir[File.join(prev_json_dir, 'overview.json')]
216
+ data_jsons = all_jsons - overview_json
217
+ parse_prev_json(data_jsons)
218
+ end
219
+
220
+ def parse_prev_json(data_jsons)
221
+ data_jsons.each do |json|
222
+ json_contents = File.read(File.expand_path(json))
223
+ data = JSON.parse(json_contents, symbolize_names: true)
224
+ idx = json.match(/(\d+).json/)[1].to_i - 1
225
+ @config[:json_output][idx] = data
226
+ print_prev_json_to_console(data)
116
227
  end
117
- ## TODO: Add a Rescue statement - e.g. if unable to create the Object...
228
+ end
229
+
230
+ def print_prev_json_to_console(data)
231
+ JsonToGVResults.print_console_header(data)
232
+ JsonToGVResults.print_output_console(data)
118
233
  end
119
234
  end
120
235
  end