genevalidatorapp 1.4.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +24 -0
  3. data/.travis.yml +7 -0
  4. data/Gemfile +4 -0
  5. data/GeneValidatorApp.gemspec +50 -0
  6. data/LICENSE.txt +661 -0
  7. data/README.md +101 -0
  8. data/Rakefile +14 -0
  9. data/bin/genevalidatorapp +122 -0
  10. data/config.ru +3 -0
  11. data/lib/GeneValidatorApp.rb +321 -0
  12. data/lib/GeneValidatorApp/config.rb +86 -0
  13. data/lib/GeneValidatorApp/database.rb +114 -0
  14. data/lib/GeneValidatorApp/genevalidator.rb +241 -0
  15. data/lib/GeneValidatorApp/logger.rb +24 -0
  16. data/lib/GeneValidatorApp/version.rb +3 -0
  17. data/public/GeneValidator/.gitkeep +0 -0
  18. data/public/web_files/css/bootstrap.min.css +7 -0
  19. data/public/web_files/css/bootstrap1.min.css +7 -0
  20. data/public/web_files/css/custom.css +521 -0
  21. data/public/web_files/css/custom.min.css +3 -0
  22. data/public/web_files/css/font-awesome.min.css +4 -0
  23. data/public/web_files/fonts/FontAwesome.otf +0 -0
  24. data/public/web_files/fonts/fontawesome-webfont.eot +0 -0
  25. data/public/web_files/fonts/fontawesome-webfont.svg +504 -0
  26. data/public/web_files/fonts/fontawesome-webfont.ttf +0 -0
  27. data/public/web_files/fonts/fontawesome-webfont.woff +0 -0
  28. data/public/web_files/img/gene.png +0 -0
  29. data/public/web_files/js/bionode-seq.min.js +1 -0
  30. data/public/web_files/js/bootstrap.min.js +6 -0
  31. data/public/web_files/js/d3.v3.min.js +5 -0
  32. data/public/web_files/js/genevalidator.js +282 -0
  33. data/public/web_files/js/genevalidator.min.js +1 -0
  34. data/public/web_files/js/jquery.cookie.min.js +1 -0
  35. data/public/web_files/js/jquery.min.js +4 -0
  36. data/public/web_files/js/jquery.tablesorter.min.js +5 -0
  37. data/public/web_files/js/jquery.validate.min.js +4 -0
  38. data/public/web_files/js/plots.js +744 -0
  39. data/public/web_files/js/plots.min.js +1 -0
  40. data/spec/app_spec.rb +107 -0
  41. data/spec/database/funky_ids/funky_ids.fa +10 -0
  42. data/spec/database/funky_ids/funky_ids.fa.nhr +0 -0
  43. data/spec/database/funky_ids/funky_ids.fa.nin +0 -0
  44. data/spec/database/funky_ids/funky_ids.fa.nog +0 -0
  45. data/spec/database/funky_ids/funky_ids.fa.nsd +9 -0
  46. data/spec/database/funky_ids/funky_ids.fa.nsi +0 -0
  47. data/spec/database/funky_ids/funky_ids.fa.nsq +0 -0
  48. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta +6449 -0
  49. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.phr +0 -0
  50. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pin +0 -0
  51. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pog +0 -0
  52. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.psd +2378 -0
  53. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.psi +0 -0
  54. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.psq +0 -0
  55. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta +5486 -0
  56. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nhr +0 -0
  57. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nin +0 -0
  58. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nog +0 -0
  59. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsd +946 -0
  60. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsi +0 -0
  61. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsq +0 -0
  62. data/spec/database/unformatted/Cardiocondyla_obscurior/Cobs1.4.proteins.fa +148303 -0
  63. data/spec/database/without_parse_seqids/without_parse_seqids.fa +10 -0
  64. data/spec/database/without_parse_seqids/without_parse_seqids.fa.phr +0 -0
  65. data/spec/database/without_parse_seqids/without_parse_seqids.fa.pin +0 -0
  66. data/spec/database/without_parse_seqids/without_parse_seqids.fa.psq +0 -0
  67. data/spec/database_spec.rb +37 -0
  68. data/spec/empty_config.yml +0 -0
  69. data/views/500.slim +5 -0
  70. data/views/index.slim +66 -0
  71. data/views/layout.slim +85 -0
  72. metadata +337 -0
@@ -0,0 +1,86 @@
1
+ require 'forwardable'
2
+
3
+ # Define Config class.
4
+ module GeneValidatorApp
5
+ # Capture our configuration system.
6
+ class Config
7
+ extend Forwardable
8
+
9
+ def_delegators GeneValidatorApp, :logger
10
+
11
+ def initialize(data = {})
12
+ @data = symbolise data
13
+ @config_file = @data.delete(:config_file) || default_config_file
14
+ @config_file = File.expand_path(@config_file)
15
+ @data = parse_config_file.update @data
16
+ @data = defaults.update @data
17
+ end
18
+
19
+ attr_reader :data, :config_file
20
+
21
+ # Get.
22
+ def [](key)
23
+ data[key]
24
+ end
25
+
26
+ # Set.
27
+ def []=(key, value)
28
+ data[key] = value
29
+ end
30
+
31
+ # Exists?
32
+ def include?(key)
33
+ data.include? key
34
+ end
35
+
36
+ # Write config data to config file.
37
+ def write_config_file
38
+ return unless config_file
39
+ File.open(config_file, 'w') do |f|
40
+ f.puts(data.delete_if { |_, v| v.nil? }.to_yaml)
41
+ end
42
+ end
43
+
44
+ private
45
+
46
+ # Symbolizes keys. Changes `database` key to `database_dir`.
47
+ def symbolise(data)
48
+ return {} unless data
49
+ # Symbolize keys.
50
+ Hash[data.map { |k, v| [k.to_sym, v] }]
51
+ end
52
+
53
+ # Parses and returns data from config_file if it exists. Returns {}
54
+ # otherwise.
55
+ def parse_config_file
56
+ unless file? config_file
57
+ logger.debug "Configuration file not found: #{config_file}"
58
+ return {}
59
+ end
60
+
61
+ logger.debug "Reading configuration file: #{config_file}."
62
+ symbolise YAML.load_file(config_file)
63
+ rescue => error
64
+ raise CONFIG_FILE_ERROR.new(config_file, error)
65
+ end
66
+
67
+ def file?(file)
68
+ file && File.exist?(file) && File.file?(file)
69
+ end
70
+
71
+ # Default configuration data.
72
+ def defaults
73
+ {
74
+ :num_threads => 1,
75
+ :port => 4567,
76
+ :host => '0.0.0.0',
77
+ :web_dir => Dir.pwd
78
+ }
79
+ end
80
+
81
+ def default_config_file
82
+ '~/.genevalidatorapp.conf'
83
+ end
84
+ end
85
+
86
+ end
@@ -0,0 +1,114 @@
1
+ require 'find'
2
+ require 'digest/md5'
3
+ require 'forwardable'
4
+
5
+ module GeneValidatorApp
6
+ # Captures a directory containing FASTA files and BLAST databases.
7
+ #
8
+ # It is important that formatted BLAST database files have the same dirname and
9
+ # basename as the source FASTA for GeneValidatorApp to be able to tell formatted
10
+ # FASTA from unformatted. And that FASTA files be formatted with `parse_seqids`
11
+ # option of `makeblastdb` for sequence retrieval to work.
12
+ #
13
+ # GeneValidatorApp will always place BLAST database files alongside input FASTA,
14
+ # and use `parse_seqids` option of `makeblastdb` to format databases.
15
+ class Database < Struct.new(:name, :title, :type)
16
+ class << self
17
+ extend Forwardable
18
+
19
+ def_delegators GeneValidatorApp, :config, :logger
20
+
21
+ def collection
22
+ @collection ||= {}
23
+ end
24
+
25
+ private :collection
26
+
27
+ def <<(database)
28
+ collection[database.id] = database
29
+ end
30
+
31
+ def [](ids)
32
+ ids = Array ids
33
+ collection.values_at(*ids)
34
+ end
35
+
36
+ def ids
37
+ collection.keys
38
+ end
39
+
40
+ def all
41
+ collection.values
42
+ end
43
+
44
+ def each(&block)
45
+ all.each(&block)
46
+ end
47
+
48
+ def include?(path)
49
+ collection.include? Digest::MD5.hexdigest path
50
+ end
51
+
52
+ def group_by(&block)
53
+ all.group_by(&block)
54
+ end
55
+
56
+ def first
57
+ all.first
58
+ end
59
+
60
+ def default_db
61
+ if config[:default_db] && Database.include?(config[:default_db])
62
+ all.find { |a| a.name == config[:default_db] }
63
+ else
64
+ all.first
65
+ end
66
+ end
67
+
68
+ def non_default_dbs
69
+ all.find_all { |a| a != Database.default_db }
70
+ end
71
+
72
+ # Returns the original structure that the title is within.
73
+ def obtain_original_structure(db_title)
74
+ all.find_all { |a| a.title.chomp == db_title }
75
+ end
76
+
77
+ # Recurisvely scan `database_dir` for blast databases.
78
+ def scan_databases_dir
79
+ database_dir = config[:database_dir]
80
+ list = `blastdbcmd -recursive -list #{database_dir} -list_outfmt "%p %f %t" 2>&1`
81
+ list.each_line do |line|
82
+ type, name, title = line.split(' ')
83
+ next if multipart_database_name?(name)
84
+ next unless type.downcase == 'protein' # to ensure we only have protein dbs
85
+ self << Database.new(name, title, type)
86
+ end
87
+ end
88
+
89
+ # Returns true if the database name appears to be a multi-part database name.
90
+ #
91
+ # e.g.
92
+ # /home/ben/pd.ben/sequenceserver/db/nr.00 => yes
93
+ # /home/ben/pd.ben/sequenceserver/db/nr => no
94
+ # /home/ben/pd.ben/sequenceserver/db/img3.5.finished.faa.01 => yes
95
+ def multipart_database_name?(db_name)
96
+ !(db_name.match(/.+\/\S+\d{2}$/).nil?)
97
+ end
98
+ end
99
+
100
+ def initialize(*args)
101
+ args.last.downcase!
102
+ args.each(&:freeze)
103
+ super
104
+
105
+ @id = Digest::MD5.hexdigest args.first
106
+ end
107
+
108
+ attr_reader :id
109
+
110
+ def to_s
111
+ "#{type}: #{title} #{name}"
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,241 @@
1
+ require 'forwardable'
2
+ require 'bio'
3
+ require 'fileutils'
4
+ require 'genevalidator'
5
+
6
+ module GeneValidatorApp
7
+ # Module that runs GeneValidator
8
+ module RunGeneValidator
9
+ # To signal error in query sequence or options.
10
+ #
11
+ # ArgumentError is raised when BLAST+'s exit status is 1; see [1].
12
+ class ArgumentError < ArgumentError
13
+ end
14
+
15
+ # To signal internal errors.
16
+ #
17
+ # RuntimeError is raised when there is a problem in writing the input file,
18
+ # in running BLAST, get_raw_sequence or genevalidator. These are rare,
19
+ # infrastructure errors, used internally, and of concern only to the
20
+ # admins/developers.
21
+ class RuntimeError < RuntimeError
22
+ end
23
+
24
+ class << self
25
+ extend Forwardable
26
+
27
+ def_delegators GeneValidatorApp, :config, :logger
28
+
29
+ attr_reader :gv_dir, :tmp_gv_dir, :input_file, :xml_file, :raw_seq,
30
+ :unique_id, :params
31
+
32
+ # Setting the scene
33
+ def init(url, params)
34
+ create_unique_id
35
+ create_subdir_in_main_tmpdir
36
+ create_soft_link_from_tmpdir_to_GV_dir
37
+ @params = params
38
+ validate_params
39
+ obtain_db_path
40
+ @url = produce_result_url_link(url)
41
+ end
42
+
43
+ # Run BLAST(X/P), get_raw_sequence and genevalidator
44
+ # Returns html for just the table or a link to the page produced by GV
45
+ def run
46
+ write_seq_to_file
47
+ run_genevalidator
48
+ (@params[:result_link]) ? @url : produce_table_html
49
+ end
50
+
51
+ private
52
+
53
+ # Creates a unique run ID (based on time),
54
+ def create_unique_id
55
+ @unique_id = Time.new.strftime('%Y-%m-%d_%H-%M-%S_%L-%N')
56
+ @gv_tmpdir = GeneValidatorApp.tempdir + unique_id
57
+ ensure_unique_id
58
+ end
59
+
60
+ # Ensures that the Unique id is unique (if a sub dir is present in the
61
+ # temp dir with the unique id, it simply creates a new one)
62
+ def ensure_unique_id
63
+ while File.exist?(@gv_tmpdir)
64
+ @unique_id = create_unique_id
65
+ @gv_tmpdir = GeneValidatorApp.tempdir + @unique_id
66
+ end
67
+ logger.debug("Unique ID = #{@unique_id}")
68
+ end
69
+
70
+ # Create a sub_dir in the Tempdir (name is based on unique id)
71
+ def create_subdir_in_main_tmpdir
72
+ logger.debug("GV Tempdir = #{@gv_tmpdir}")
73
+ FileUtils.mkdir_p(@gv_tmpdir)
74
+ end
75
+
76
+ # Create the Tmp Dir and the create a soft link to it.
77
+ def create_soft_link_from_tmpdir_to_GV_dir
78
+ @gv_dir = GeneValidatorApp.public_dir + 'GeneValidator' + @unique_id
79
+ logger.debug("Local GV dir = #{@gv_dir}")
80
+ FileUtils.ln_s "#{@gv_tmpdir}", "#{@gv_dir}"
81
+ end
82
+
83
+ # Validates the paramaters provided via the app.
84
+ # Only important if POST request is sent via API - Web APP also validates
85
+ # all params via Javascript.
86
+ def validate_params
87
+ check_seq_param_present
88
+ check_seq_length
89
+ check_validations_param_present
90
+ check_database_params_present
91
+ end
92
+
93
+ # Simply asserts whether that the seq param is present
94
+ def check_seq_param_present
95
+ unless @params[:seq]
96
+ fail ArgumentError, 'No input sequence provided.'
97
+ end
98
+ end
99
+
100
+ def check_seq_length
101
+ return unless config[:max_characters]
102
+ unless @params[:seq].length < config[:max_characters]
103
+ fail ArgumentError, 'The input sequence is too long.'
104
+ end
105
+ end
106
+
107
+ # Asserts whether the validations param are specified
108
+ def check_validations_param_present
109
+ unless @params[:validations]
110
+ fail ArgumentError, 'No validations specified'
111
+ end
112
+ end
113
+
114
+ # Asserts whether the database parameter is present
115
+ def check_database_params_present
116
+ fail ArgumentError, 'No database specified' unless @params[:database]
117
+ end
118
+
119
+ def obtain_db_path
120
+ Database.obtain_original_structure(@params[:database]).each do |db|
121
+ @db = db.name
122
+ end
123
+ end
124
+
125
+ # Writes the input sequences to a file with the sub_dir in the temp_dir
126
+ def write_seq_to_file
127
+ @input_fasta_file = @gv_tmpdir + 'input_file.fa'
128
+ logger.debug("Writing input seqs to: '#{@input_fasta_file}'")
129
+ ensure_unix_line_ending
130
+ ensure_fasta_valid
131
+ File.open(@input_fasta_file, 'w+') do |f|
132
+ f.write(@params[:seq])
133
+ end
134
+ assert_input_file_present
135
+ end
136
+
137
+ def ensure_unix_line_ending
138
+ @params[:seq].gsub!(/\r\n?/, "\n")
139
+ end
140
+
141
+ # Adds a ID (based on the time when submitted) to sequences that are not
142
+ # in fasta format.
143
+ def ensure_fasta_valid
144
+ logger.debug('Adding an ID to sequences that are not in fasta format.')
145
+ unique_queries = {}
146
+ sequence = @params[:seq].lstrip
147
+ if sequence[0] != '>'
148
+ sequence.insert(0, ">Submitted:#{Time.now.strftime('%H:%M-%B_%d_%Y')}\n")
149
+ end
150
+ sequence.gsub!(/^\>(\S+)/) do |s|
151
+ if unique_queries.key?(s)
152
+ unique_queries[s] += 1
153
+ s + '_' + (unique_queries[s] - 1).to_s
154
+ else
155
+ unique_queries[s] = 1
156
+ s
157
+ end
158
+ end
159
+ @params[:seq] = sequence
160
+ end
161
+
162
+ # Asserts whether the input file has been generated and whether it is
163
+ # empty
164
+ def assert_input_file_present
165
+ unless File.exist?(@input_fasta_file) || File.zero?(@input_fasta_file)
166
+ fail RuntimeError, 'GeneValidatorApp was unable to create the input' \
167
+ ' file.'
168
+ end
169
+ end
170
+
171
+ # Returns 'blastp' if sequence contains amino acids or returns 'blastx'
172
+ # if it contains nucleic acids.
173
+ def get_blast_type(sequences)
174
+ (check_seq_type(sequences) == Bio::Sequence::AA) ? 'blastp' : 'blastx'
175
+ end
176
+
177
+ def check_seq_type(sequences)
178
+ Bio::Sequence.new(Bio::FastaFormat.new(sequences).seq).guess(0.9)
179
+ end
180
+
181
+ # Runs GeneValidator
182
+ def run_genevalidator
183
+ opts = set_up_gv_opts
184
+ logger.debug("Running GeneValidator with options: #{opts}")
185
+ create_gv_log_file
186
+ run_gv
187
+ assert_table_output_file_produced
188
+ rescue SystemExit
189
+ raise RuntimeError, 'GeneValidator failed to run properly'
190
+ end
191
+
192
+ def run_gv
193
+ original_stdout = $stdout.clone unless logger.debug?
194
+ $stdout.reopen(@gv_log_file, 'w') unless logger.debug?
195
+ (GeneValidator::Validation.new(opts, 1, true, true)).run
196
+ $stdout = original_stdout unless logger.debug?
197
+ end
198
+
199
+ def set_up_gv_opts
200
+ {
201
+ validations: @params[:validations],
202
+ db: @db,
203
+ num_threads: config[:num_threads],
204
+ fast: true,
205
+ input_fasta_file: @input_fasta_file.to_s
206
+ }
207
+ end
208
+
209
+ def create_gv_log_file
210
+ @gv_log_file = (@gv_tmpdir + 'log_file.txt').to_s
211
+ logger.debug("Log file: #{@gv_log_file}")
212
+ end
213
+
214
+ # Assets whether the results file is produced by GeneValidator.
215
+ def assert_table_output_file_produced
216
+ @table_file = @gv_dir + 'input_file.fa.html/files/table.html'
217
+ unless File.exist?(@table_file)
218
+ fail RuntimeError, 'GeneValidator did not produce the required' \
219
+ ' output file.'
220
+ end
221
+ end
222
+
223
+ # Reads the GV output table file.
224
+ # Updates links to the plots with relative links to plot jsons.
225
+ def produce_table_html
226
+ orig_plots_dir = 'files/json/input_file.fa_'
227
+ local_plots_dir = Pathname.new('GeneValidator') + @unique_id +
228
+ 'input_file.fa.html/files/json/input_file.fa_'
229
+ full_html = IO.binread(@table_file)
230
+ full_html.gsub(/#{orig_plots_dir}/, local_plots_dir.to_s).gsub(
231
+ '#Place_external_results_link_here', @url)
232
+ end
233
+
234
+ # Reuturns the URL of the results page.
235
+ def produce_result_url_link(url)
236
+ url.gsub(/input/, '').gsub(/\/*$/, '') +
237
+ "/GeneValidator/#{@unique_id}/input_file.fa.html/results.html"
238
+ end
239
+ end
240
+ end
241
+ end
@@ -0,0 +1,24 @@
1
+ require 'logger'
2
+
3
+ module GeneValidatorApp
4
+ class Logger < Logger
5
+ def initialize(dev, verbose = false)
6
+ super dev
7
+ self.level = verbose ? DEBUG : INFO
8
+ self.formatter = Formatter.new
9
+ end
10
+
11
+ # We change Logging format so that it is consistent with Sinatra's
12
+ class Formatter < Formatter
13
+ Format = "[%s] %s %s\n"
14
+
15
+ def initialize
16
+ self.datetime_format = '%Y-%m-%d %H:%M:%S'
17
+ end
18
+
19
+ def call(severity, time, _progname, msg)
20
+ Format % [format_datetime(time), severity, msg2str(msg)]
21
+ end
22
+ end
23
+ end
24
+ end