genevalidatorapp 1.4.12

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +24 -0
  3. data/.travis.yml +7 -0
  4. data/Gemfile +4 -0
  5. data/GeneValidatorApp.gemspec +50 -0
  6. data/LICENSE.txt +661 -0
  7. data/README.md +101 -0
  8. data/Rakefile +14 -0
  9. data/bin/genevalidatorapp +122 -0
  10. data/config.ru +3 -0
  11. data/lib/GeneValidatorApp.rb +321 -0
  12. data/lib/GeneValidatorApp/config.rb +86 -0
  13. data/lib/GeneValidatorApp/database.rb +114 -0
  14. data/lib/GeneValidatorApp/genevalidator.rb +241 -0
  15. data/lib/GeneValidatorApp/logger.rb +24 -0
  16. data/lib/GeneValidatorApp/version.rb +3 -0
  17. data/public/GeneValidator/.gitkeep +0 -0
  18. data/public/web_files/css/bootstrap.min.css +7 -0
  19. data/public/web_files/css/bootstrap1.min.css +7 -0
  20. data/public/web_files/css/custom.css +521 -0
  21. data/public/web_files/css/custom.min.css +3 -0
  22. data/public/web_files/css/font-awesome.min.css +4 -0
  23. data/public/web_files/fonts/FontAwesome.otf +0 -0
  24. data/public/web_files/fonts/fontawesome-webfont.eot +0 -0
  25. data/public/web_files/fonts/fontawesome-webfont.svg +504 -0
  26. data/public/web_files/fonts/fontawesome-webfont.ttf +0 -0
  27. data/public/web_files/fonts/fontawesome-webfont.woff +0 -0
  28. data/public/web_files/img/gene.png +0 -0
  29. data/public/web_files/js/bionode-seq.min.js +1 -0
  30. data/public/web_files/js/bootstrap.min.js +6 -0
  31. data/public/web_files/js/d3.v3.min.js +5 -0
  32. data/public/web_files/js/genevalidator.js +282 -0
  33. data/public/web_files/js/genevalidator.min.js +1 -0
  34. data/public/web_files/js/jquery.cookie.min.js +1 -0
  35. data/public/web_files/js/jquery.min.js +4 -0
  36. data/public/web_files/js/jquery.tablesorter.min.js +5 -0
  37. data/public/web_files/js/jquery.validate.min.js +4 -0
  38. data/public/web_files/js/plots.js +744 -0
  39. data/public/web_files/js/plots.min.js +1 -0
  40. data/spec/app_spec.rb +107 -0
  41. data/spec/database/funky_ids/funky_ids.fa +10 -0
  42. data/spec/database/funky_ids/funky_ids.fa.nhr +0 -0
  43. data/spec/database/funky_ids/funky_ids.fa.nin +0 -0
  44. data/spec/database/funky_ids/funky_ids.fa.nog +0 -0
  45. data/spec/database/funky_ids/funky_ids.fa.nsd +9 -0
  46. data/spec/database/funky_ids/funky_ids.fa.nsi +0 -0
  47. data/spec/database/funky_ids/funky_ids.fa.nsq +0 -0
  48. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta +6449 -0
  49. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.phr +0 -0
  50. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pin +0 -0
  51. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.pog +0 -0
  52. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.psd +2378 -0
  53. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.psi +0 -0
  54. data/spec/database/sample/proteins/Solenopsis_invicta/Sinvicta2-2-3.prot.subset.fasta.psq +0 -0
  55. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta +5486 -0
  56. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nhr +0 -0
  57. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nin +0 -0
  58. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nog +0 -0
  59. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsd +946 -0
  60. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsi +0 -0
  61. data/spec/database/sample/transcripts/Solenopsis_invicta/Sinvicta2-2-3.cdna.subset.fasta.nsq +0 -0
  62. data/spec/database/unformatted/Cardiocondyla_obscurior/Cobs1.4.proteins.fa +148303 -0
  63. data/spec/database/without_parse_seqids/without_parse_seqids.fa +10 -0
  64. data/spec/database/without_parse_seqids/without_parse_seqids.fa.phr +0 -0
  65. data/spec/database/without_parse_seqids/without_parse_seqids.fa.pin +0 -0
  66. data/spec/database/without_parse_seqids/without_parse_seqids.fa.psq +0 -0
  67. data/spec/database_spec.rb +37 -0
  68. data/spec/empty_config.yml +0 -0
  69. data/views/500.slim +5 -0
  70. data/views/index.slim +66 -0
  71. data/views/layout.slim +85 -0
  72. metadata +337 -0
@@ -0,0 +1,86 @@
1
+ require 'forwardable'
2
+
3
+ # Define Config class.
4
+ module GeneValidatorApp
5
+ # Capture our configuration system.
6
+ class Config
7
+ extend Forwardable
8
+
9
+ def_delegators GeneValidatorApp, :logger
10
+
11
+ def initialize(data = {})
12
+ @data = symbolise data
13
+ @config_file = @data.delete(:config_file) || default_config_file
14
+ @config_file = File.expand_path(@config_file)
15
+ @data = parse_config_file.update @data
16
+ @data = defaults.update @data
17
+ end
18
+
19
+ attr_reader :data, :config_file
20
+
21
+ # Get.
22
+ def [](key)
23
+ data[key]
24
+ end
25
+
26
+ # Set.
27
+ def []=(key, value)
28
+ data[key] = value
29
+ end
30
+
31
+ # Exists?
32
+ def include?(key)
33
+ data.include? key
34
+ end
35
+
36
+ # Write config data to config file.
37
+ def write_config_file
38
+ return unless config_file
39
+ File.open(config_file, 'w') do |f|
40
+ f.puts(data.delete_if { |_, v| v.nil? }.to_yaml)
41
+ end
42
+ end
43
+
44
+ private
45
+
46
+ # Symbolizes keys. Changes `database` key to `database_dir`.
47
+ def symbolise(data)
48
+ return {} unless data
49
+ # Symbolize keys.
50
+ Hash[data.map { |k, v| [k.to_sym, v] }]
51
+ end
52
+
53
+ # Parses and returns data from config_file if it exists. Returns {}
54
+ # otherwise.
55
+ def parse_config_file
56
+ unless file? config_file
57
+ logger.debug "Configuration file not found: #{config_file}"
58
+ return {}
59
+ end
60
+
61
+ logger.debug "Reading configuration file: #{config_file}."
62
+ symbolise YAML.load_file(config_file)
63
+ rescue => error
64
+ raise CONFIG_FILE_ERROR.new(config_file, error)
65
+ end
66
+
67
+ def file?(file)
68
+ file && File.exist?(file) && File.file?(file)
69
+ end
70
+
71
+ # Default configuration data.
72
+ def defaults
73
+ {
74
+ :num_threads => 1,
75
+ :port => 4567,
76
+ :host => '0.0.0.0',
77
+ :web_dir => Dir.pwd
78
+ }
79
+ end
80
+
81
+ def default_config_file
82
+ '~/.genevalidatorapp.conf'
83
+ end
84
+ end
85
+
86
+ end
@@ -0,0 +1,114 @@
1
+ require 'find'
2
+ require 'digest/md5'
3
+ require 'forwardable'
4
+
5
+ module GeneValidatorApp
6
+ # Captures a directory containing FASTA files and BLAST databases.
7
+ #
8
+ # It is important that formatted BLAST database files have the same dirname and
9
+ # basename as the source FASTA for GeneValidatorApp to be able to tell formatted
10
+ # FASTA from unformatted. And that FASTA files be formatted with `parse_seqids`
11
+ # option of `makeblastdb` for sequence retrieval to work.
12
+ #
13
+ # GeneValidatorApp will always place BLAST database files alongside input FASTA,
14
+ # and use `parse_seqids` option of `makeblastdb` to format databases.
15
+ class Database < Struct.new(:name, :title, :type)
16
+ class << self
17
+ extend Forwardable
18
+
19
+ def_delegators GeneValidatorApp, :config, :logger
20
+
21
+ def collection
22
+ @collection ||= {}
23
+ end
24
+
25
+ private :collection
26
+
27
+ def <<(database)
28
+ collection[database.id] = database
29
+ end
30
+
31
+ def [](ids)
32
+ ids = Array ids
33
+ collection.values_at(*ids)
34
+ end
35
+
36
+ def ids
37
+ collection.keys
38
+ end
39
+
40
+ def all
41
+ collection.values
42
+ end
43
+
44
+ def each(&block)
45
+ all.each(&block)
46
+ end
47
+
48
+ def include?(path)
49
+ collection.include? Digest::MD5.hexdigest path
50
+ end
51
+
52
+ def group_by(&block)
53
+ all.group_by(&block)
54
+ end
55
+
56
+ def first
57
+ all.first
58
+ end
59
+
60
+ def default_db
61
+ if config[:default_db] && Database.include?(config[:default_db])
62
+ all.find { |a| a.name == config[:default_db] }
63
+ else
64
+ all.first
65
+ end
66
+ end
67
+
68
+ def non_default_dbs
69
+ all.find_all { |a| a != Database.default_db }
70
+ end
71
+
72
+ # Returns the original structure that the title is within.
73
+ def obtain_original_structure(db_title)
74
+ all.find_all { |a| a.title.chomp == db_title }
75
+ end
76
+
77
+ # Recurisvely scan `database_dir` for blast databases.
78
+ def scan_databases_dir
79
+ database_dir = config[:database_dir]
80
+ list = `blastdbcmd -recursive -list #{database_dir} -list_outfmt "%p %f %t" 2>&1`
81
+ list.each_line do |line|
82
+ type, name, title = line.split(' ')
83
+ next if multipart_database_name?(name)
84
+ next unless type.downcase == 'protein' # to ensure we only have protein dbs
85
+ self << Database.new(name, title, type)
86
+ end
87
+ end
88
+
89
+ # Returns true if the database name appears to be a multi-part database name.
90
+ #
91
+ # e.g.
92
+ # /home/ben/pd.ben/sequenceserver/db/nr.00 => yes
93
+ # /home/ben/pd.ben/sequenceserver/db/nr => no
94
+ # /home/ben/pd.ben/sequenceserver/db/img3.5.finished.faa.01 => yes
95
+ def multipart_database_name?(db_name)
96
+ !(db_name.match(/.+\/\S+\d{2}$/).nil?)
97
+ end
98
+ end
99
+
100
+ def initialize(*args)
101
+ args.last.downcase!
102
+ args.each(&:freeze)
103
+ super
104
+
105
+ @id = Digest::MD5.hexdigest args.first
106
+ end
107
+
108
+ attr_reader :id
109
+
110
+ def to_s
111
+ "#{type}: #{title} #{name}"
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,241 @@
1
+ require 'forwardable'
2
+ require 'bio'
3
+ require 'fileutils'
4
+ require 'genevalidator'
5
+
6
+ module GeneValidatorApp
7
+ # Module that runs GeneValidator
8
+ module RunGeneValidator
9
+ # To signal error in query sequence or options.
10
+ #
11
+ # ArgumentError is raised when BLAST+'s exit status is 1; see [1].
12
+ class ArgumentError < ArgumentError
13
+ end
14
+
15
+ # To signal internal errors.
16
+ #
17
+ # RuntimeError is raised when there is a problem in writing the input file,
18
+ # in running BLAST, get_raw_sequence or genevalidator. These are rare,
19
+ # infrastructure errors, used internally, and of concern only to the
20
+ # admins/developers.
21
+ class RuntimeError < RuntimeError
22
+ end
23
+
24
+ class << self
25
+ extend Forwardable
26
+
27
+ def_delegators GeneValidatorApp, :config, :logger
28
+
29
+ attr_reader :gv_dir, :tmp_gv_dir, :input_file, :xml_file, :raw_seq,
30
+ :unique_id, :params
31
+
32
+ # Setting the scene
33
+ def init(url, params)
34
+ create_unique_id
35
+ create_subdir_in_main_tmpdir
36
+ create_soft_link_from_tmpdir_to_GV_dir
37
+ @params = params
38
+ validate_params
39
+ obtain_db_path
40
+ @url = produce_result_url_link(url)
41
+ end
42
+
43
+ # Run BLAST(X/P), get_raw_sequence and genevalidator
44
+ # Returns html for just the table or a link to the page produced by GV
45
+ def run
46
+ write_seq_to_file
47
+ run_genevalidator
48
+ (@params[:result_link]) ? @url : produce_table_html
49
+ end
50
+
51
+ private
52
+
53
+ # Creates a unique run ID (based on time),
54
+ def create_unique_id
55
+ @unique_id = Time.new.strftime('%Y-%m-%d_%H-%M-%S_%L-%N')
56
+ @gv_tmpdir = GeneValidatorApp.tempdir + unique_id
57
+ ensure_unique_id
58
+ end
59
+
60
+ # Ensures that the Unique id is unique (if a sub dir is present in the
61
+ # temp dir with the unique id, it simply creates a new one)
62
+ def ensure_unique_id
63
+ while File.exist?(@gv_tmpdir)
64
+ @unique_id = create_unique_id
65
+ @gv_tmpdir = GeneValidatorApp.tempdir + @unique_id
66
+ end
67
+ logger.debug("Unique ID = #{@unique_id}")
68
+ end
69
+
70
+ # Create a sub_dir in the Tempdir (name is based on unique id)
71
+ def create_subdir_in_main_tmpdir
72
+ logger.debug("GV Tempdir = #{@gv_tmpdir}")
73
+ FileUtils.mkdir_p(@gv_tmpdir)
74
+ end
75
+
76
+ # Create the Tmp Dir and the create a soft link to it.
77
+ def create_soft_link_from_tmpdir_to_GV_dir
78
+ @gv_dir = GeneValidatorApp.public_dir + 'GeneValidator' + @unique_id
79
+ logger.debug("Local GV dir = #{@gv_dir}")
80
+ FileUtils.ln_s "#{@gv_tmpdir}", "#{@gv_dir}"
81
+ end
82
+
83
+ # Validates the paramaters provided via the app.
84
+ # Only important if POST request is sent via API - Web APP also validates
85
+ # all params via Javascript.
86
+ def validate_params
87
+ check_seq_param_present
88
+ check_seq_length
89
+ check_validations_param_present
90
+ check_database_params_present
91
+ end
92
+
93
+ # Simply asserts whether that the seq param is present
94
+ def check_seq_param_present
95
+ unless @params[:seq]
96
+ fail ArgumentError, 'No input sequence provided.'
97
+ end
98
+ end
99
+
100
+ def check_seq_length
101
+ return unless config[:max_characters]
102
+ unless @params[:seq].length < config[:max_characters]
103
+ fail ArgumentError, 'The input sequence is too long.'
104
+ end
105
+ end
106
+
107
+ # Asserts whether the validations param are specified
108
+ def check_validations_param_present
109
+ unless @params[:validations]
110
+ fail ArgumentError, 'No validations specified'
111
+ end
112
+ end
113
+
114
+ # Asserts whether the database parameter is present
115
+ def check_database_params_present
116
+ fail ArgumentError, 'No database specified' unless @params[:database]
117
+ end
118
+
119
+ def obtain_db_path
120
+ Database.obtain_original_structure(@params[:database]).each do |db|
121
+ @db = db.name
122
+ end
123
+ end
124
+
125
+ # Writes the input sequences to a file with the sub_dir in the temp_dir
126
+ def write_seq_to_file
127
+ @input_fasta_file = @gv_tmpdir + 'input_file.fa'
128
+ logger.debug("Writing input seqs to: '#{@input_fasta_file}'")
129
+ ensure_unix_line_ending
130
+ ensure_fasta_valid
131
+ File.open(@input_fasta_file, 'w+') do |f|
132
+ f.write(@params[:seq])
133
+ end
134
+ assert_input_file_present
135
+ end
136
+
137
+ def ensure_unix_line_ending
138
+ @params[:seq].gsub!(/\r\n?/, "\n")
139
+ end
140
+
141
+ # Adds a ID (based on the time when submitted) to sequences that are not
142
+ # in fasta format.
143
+ def ensure_fasta_valid
144
+ logger.debug('Adding an ID to sequences that are not in fasta format.')
145
+ unique_queries = {}
146
+ sequence = @params[:seq].lstrip
147
+ if sequence[0] != '>'
148
+ sequence.insert(0, ">Submitted:#{Time.now.strftime('%H:%M-%B_%d_%Y')}\n")
149
+ end
150
+ sequence.gsub!(/^\>(\S+)/) do |s|
151
+ if unique_queries.key?(s)
152
+ unique_queries[s] += 1
153
+ s + '_' + (unique_queries[s] - 1).to_s
154
+ else
155
+ unique_queries[s] = 1
156
+ s
157
+ end
158
+ end
159
+ @params[:seq] = sequence
160
+ end
161
+
162
+ # Asserts whether the input file has been generated and whether it is
163
+ # empty
164
+ def assert_input_file_present
165
+ unless File.exist?(@input_fasta_file) || File.zero?(@input_fasta_file)
166
+ fail RuntimeError, 'GeneValidatorApp was unable to create the input' \
167
+ ' file.'
168
+ end
169
+ end
170
+
171
+ # Returns 'blastp' if sequence contains amino acids or returns 'blastx'
172
+ # if it contains nucleic acids.
173
+ def get_blast_type(sequences)
174
+ (check_seq_type(sequences) == Bio::Sequence::AA) ? 'blastp' : 'blastx'
175
+ end
176
+
177
+ def check_seq_type(sequences)
178
+ Bio::Sequence.new(Bio::FastaFormat.new(sequences).seq).guess(0.9)
179
+ end
180
+
181
+ # Runs GeneValidator
182
+ def run_genevalidator
183
+ opts = set_up_gv_opts
184
+ logger.debug("Running GeneValidator with options: #{opts}")
185
+ create_gv_log_file
186
+ run_gv
187
+ assert_table_output_file_produced
188
+ rescue SystemExit
189
+ raise RuntimeError, 'GeneValidator failed to run properly'
190
+ end
191
+
192
+ def run_gv
193
+ original_stdout = $stdout.clone unless logger.debug?
194
+ $stdout.reopen(@gv_log_file, 'w') unless logger.debug?
195
+ (GeneValidator::Validation.new(opts, 1, true, true)).run
196
+ $stdout = original_stdout unless logger.debug?
197
+ end
198
+
199
+ def set_up_gv_opts
200
+ {
201
+ validations: @params[:validations],
202
+ db: @db,
203
+ num_threads: config[:num_threads],
204
+ fast: true,
205
+ input_fasta_file: @input_fasta_file.to_s
206
+ }
207
+ end
208
+
209
+ def create_gv_log_file
210
+ @gv_log_file = (@gv_tmpdir + 'log_file.txt').to_s
211
+ logger.debug("Log file: #{@gv_log_file}")
212
+ end
213
+
214
+ # Assets whether the results file is produced by GeneValidator.
215
+ def assert_table_output_file_produced
216
+ @table_file = @gv_dir + 'input_file.fa.html/files/table.html'
217
+ unless File.exist?(@table_file)
218
+ fail RuntimeError, 'GeneValidator did not produce the required' \
219
+ ' output file.'
220
+ end
221
+ end
222
+
223
+ # Reads the GV output table file.
224
+ # Updates links to the plots with relative links to plot jsons.
225
+ def produce_table_html
226
+ orig_plots_dir = 'files/json/input_file.fa_'
227
+ local_plots_dir = Pathname.new('GeneValidator') + @unique_id +
228
+ 'input_file.fa.html/files/json/input_file.fa_'
229
+ full_html = IO.binread(@table_file)
230
+ full_html.gsub(/#{orig_plots_dir}/, local_plots_dir.to_s).gsub(
231
+ '#Place_external_results_link_here', @url)
232
+ end
233
+
234
+ # Reuturns the URL of the results page.
235
+ def produce_result_url_link(url)
236
+ url.gsub(/input/, '').gsub(/\/*$/, '') +
237
+ "/GeneValidator/#{@unique_id}/input_file.fa.html/results.html"
238
+ end
239
+ end
240
+ end
241
+ end
@@ -0,0 +1,24 @@
1
+ require 'logger'
2
+
3
+ module GeneValidatorApp
4
+ class Logger < Logger
5
+ def initialize(dev, verbose = false)
6
+ super dev
7
+ self.level = verbose ? DEBUG : INFO
8
+ self.formatter = Formatter.new
9
+ end
10
+
11
+ # We change Logging format so that it is consistent with Sinatra's
12
+ class Formatter < Formatter
13
+ Format = "[%s] %s %s\n"
14
+
15
+ def initialize
16
+ self.datetime_format = '%Y-%m-%d %H:%M:%S'
17
+ end
18
+
19
+ def call(severity, time, _progname, msg)
20
+ Format % [format_datetime(time), severity, msg2str(msg)]
21
+ end
22
+ end
23
+ end
24
+ end