sequenceserver 1.1.0.beta11 → 1.1.0.beta12

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,4 @@
1
1
  module SequenceServer
2
-
3
2
  # API errors have an http status, title, message, and additional information
4
3
  # like stacktrace or information from program output.
5
4
  class APIError < StandardError
@@ -38,10 +37,10 @@ module SequenceServer
38
37
  end
39
38
 
40
39
  def message
41
- <<MSG
42
- Looks like there's a problem with one of the query sequences, selected
43
- databases, or advanced parameters.
44
- MSG
40
+ <<~MSG
41
+ Looks like there's a problem with one of the query sequences, selected
42
+ databases, or advanced parameters.
43
+ MSG
45
44
  end
46
45
 
47
46
  attr_reader :more_info
@@ -63,12 +62,12 @@ MSG
63
62
  end
64
63
 
65
64
  def message
66
- <<MSG
67
- Looks like there is a problem with the server. Try visiting the page again
68
- after a while. If this message persists, please report the problem on our
69
- <a href="https://github.com/wurmlab/sequenceserver/issues" target="_blank">
70
- issue tracker</a>.
71
- MSG
65
+ <<~MSG
66
+ Looks like there is a problem with the server. Try visiting the page again
67
+ after a while. If this message persists, please report the problem on our
68
+ <a href="https://github.com/wurmlab/sequenceserver/issues" target="_blank">
69
+ issue tracker</a>.
70
+ MSG
72
71
  end
73
72
 
74
73
  attr_reader :more_info
@@ -4,18 +4,18 @@ module SequenceServer
4
4
  module BLAST
5
5
  ERROR_LINE = /Error:\s(.*)/
6
6
 
7
- ALGORITHMS = %w(blastn blastp blastx tblastn tblastx)
7
+ ALGORITHMS = %w[blastn blastp blastx tblastn tblastx].freeze
8
8
 
9
- OUTFMT_SPECIFIERS = %w(qseqid qgi qacc sseqid sallseqid sgi sallgi sacc
9
+ OUTFMT_SPECIFIERS = %w[qseqid qgi qacc sseqid sallseqid sgi sallgi sacc
10
10
  sallacc qstart qend sstart send qseq sseq evalue
11
11
  bitscore score length length pident nident
12
12
  mismatch positive gapopen gaps ppos frames
13
13
  qframe hframe btop staxids sscinames scomnames
14
14
  sblastnames sskingdoms stitle salltitles sstrand
15
- qcovs qcovhsp).join(' ')
15
+ qcovs qcovhsp].join(' ').freeze
16
16
 
17
- STDREP = %w(qseqid sseqid sscinames pident length mismatch gapopen qstart qend
18
- sstart send evalue bitscore qcovs qcovhsp).join(' ')
17
+ STDREP = %w[qseqid sseqid sscinames pident length mismatch gapopen qstart
18
+ qend sstart send evalue bitscore qcovs qcovhsp].join(' ').freeze
19
19
  OUTFMT = {
20
20
  'pairwise' => [0, :txt],
21
21
  'qa' => [1, :txt],
@@ -30,7 +30,7 @@ module SequenceServer
30
30
  'asn_binary' => [9, :asn],
31
31
  'csv' => [10, :csv],
32
32
  'archive' => [11, :txt]
33
- }
33
+ }.freeze
34
34
  end
35
35
  end
36
36
 
@@ -9,7 +9,7 @@ module SequenceServer
9
9
  # return saved ouput.
10
10
  class Formatter
11
11
  class << self
12
- alias_method :run, :new
12
+ alias run new
13
13
  end
14
14
 
15
15
  extend Forwardable
@@ -41,13 +41,12 @@ module SequenceServer
41
41
  return if File.exist?(file)
42
42
  command = "blast_formatter -archive '#{job.stdout}'" \
43
43
  " -outfmt '#{format} #{specifiers}'"
44
- sys(command, path: config[:bin], dir: DOTDIR,
45
- stdout: file)
44
+ sys(command, path: config[:bin], dir: DOTDIR, stdout: file)
46
45
  rescue CommandFailed => e
47
46
  # Mostly we will never get here: empty archive file,
48
47
  # file permissions, broken BLAST binaries, etc. will
49
48
  # have been caught before reaching here.
50
- fail SystemError, e.stderr
49
+ raise SystemError, e.stderr
51
50
  end
52
51
  end
53
52
  end
@@ -59,8 +59,11 @@ module SequenceServer
59
59
  end
60
60
 
61
61
  def to_json(*args)
62
- [:number, :id, :accession, :title, :length, :score, :identity, :qcovs,
63
- :sciname, :evalue, :hsps, :links].inject({}) { |h, k| h[k] = send(k); h }.to_json(*args)
62
+ %i[number id accession title length score identity qcovs
63
+ sciname evalue hsps links].inject({}) { |h, k|
64
+ h[k] = send(k)
65
+ h
66
+ }.to_json(*args)
64
67
  end
65
68
 
66
69
  private
@@ -5,8 +5,8 @@ module SequenceServer
5
5
  HSP = Struct.new(:hit, :number, :bit_score, :score, :evalue, :qstart, :qend,
6
6
  :sstart, :send, :qframe, :sframe, :identity, :positives,
7
7
  :gaps, :length, :qcovhsp, :qseq, :sseq, :midline) do
8
- INTEGER_ARGS = [1, 3].concat((5..15).to_a)
9
- FLOAT_ARGS = [2, 4]
8
+ INTEGER_ARGS = [1, 3].concat((5..15).to_a).freeze
9
+ FLOAT_ARGS = [2, 4].freeze
10
10
 
11
11
  def initialize(*args)
12
12
  INTEGER_ARGS.each do |i|
@@ -21,9 +21,12 @@ module SequenceServer
21
21
  end
22
22
 
23
23
  def to_json(*args)
24
- [:number, :bit_score, :score, :evalue, :qstart, :qend,
25
- :sstart, :send, :qframe, :sframe, :identity, :positives,
26
- :gaps, :length, :qcovhsp, :qseq, :sseq, :midline].inject({}) { |h, k| h[k] = self[k]; h }.to_json(*args)
24
+ %i[number bit_score score evalue qstart qend
25
+ sstart send qframe sframe identity positives
26
+ gaps length qcovhsp qseq sseq midline].inject({}) { |h, k|
27
+ h[k] = self[k]
28
+ h
29
+ }.to_json(*args)
27
30
  end
28
31
  end
29
32
  end
@@ -1,12 +1,10 @@
1
1
  require 'sequenceserver/job'
2
2
 
3
3
  module SequenceServer
4
-
5
4
  # BLAST module.
6
5
  module BLAST
7
6
  # Extends SequenceServer::Job to describe a BLAST job.
8
7
  class Job < Job
9
-
10
8
  def initialize(params)
11
9
  validate params
12
10
  super do
@@ -33,9 +31,11 @@ module SequenceServer
33
31
 
34
32
  # Override Job#raise! to raise specific API errors based on exitstatus
35
33
  # and using contents of stderr to provide context about the error.
34
+ #
35
+ # rubocop:disable Metrics/CyclomaticComplexity
36
36
  def raise!
37
37
  # Return true exit status is 0 and stdout is not empty.
38
- return true if exitstatus == 0 && !File.zero?(stdout)
38
+ return true if exitstatus.zero? && !File.zero?(stdout)
39
39
 
40
40
  # Handle error. See [1].
41
41
  case exitstatus
@@ -47,16 +47,16 @@ module SequenceServer
47
47
  when 4
48
48
  # Out of memory. User can retry with a shorter search, so raising
49
49
  # InputError here instead of SystemError.
50
- fail InputError, <<MSG
51
- Ran out of memory. Please try a smaller query, or searching fewer and smaller
52
- databases, or limiting the output by using advanced options.
53
- MSG
50
+ fail InputError, <<~MSG
51
+ Ran out of memory. Please try a smaller query, or searching fewer and smaller
52
+ databases, or limiting the output by using advanced options.
53
+ MSG
54
54
  when 6
55
55
  # Error creating output files. It can't be a permission issue as that
56
56
  # would have been caught while creating job directory. But we can run
57
57
  # out of storage after creating the job directory and while running
58
58
  # the job. This is a SystemError.
59
- fail SystemError, "Ran out of disk space."
59
+ fail SystemError, 'Ran out of disk space.'
60
60
  else
61
61
  # I am not sure what the exit codes 2 & 3 means and we should note
62
62
  # encounter exit code 5. The only other error that I know can happen
@@ -64,16 +64,17 @@ MSG
64
64
  # macOS updates. So raise SystemError, include the exit status in the
65
65
  # message, and say that that the "most likely" reason is broken BLAST+
66
66
  # binaries.
67
- fail SystemError, <<MSG
68
- BLAST failed abruptly (exit status: #{exitstatus}). Most likely there is a
69
- problem with the BLAST+ binaries.
70
- MSG
67
+ fail SystemError, <<~MSG
68
+ BLAST failed abruptly (exit status: #{exitstatus}). Most likely there is a
69
+ problem with the BLAST+ binaries.
70
+ MSG
71
71
  end
72
72
  end
73
+ # rubocop:enable Metrics/CyclomaticComplexity
73
74
 
74
75
  private
75
76
 
76
- def parse_advanced param_line
77
+ def parse_advanced(param_line)
77
78
  param_list = (param_line || '').split(' ')
78
79
  res = {}
79
80
 
@@ -10,7 +10,7 @@ module SequenceServer
10
10
  def initialize(*args)
11
11
  args[1] = args[1].to_i
12
12
  if args[2] == 'No definition line' ||
13
- args[2] == 'unnamed protein product'
13
+ args[2] == 'unnamed protein product'
14
14
  args[2] = "Query_#{args[1]}"
15
15
  end
16
16
  args[3] = args[3].to_i
@@ -22,8 +22,10 @@ module SequenceServer
22
22
  attr_reader :id, :title
23
23
 
24
24
  def to_json(*args)
25
- [:number, :id, :title, :length, :hits]
26
- .inject({}) { |h, k| h[k] = send(k); h }.to_json(*args)
25
+ %i[number id title length hits].inject({}) do |h, k|
26
+ h[k] = send(k)
27
+ h
28
+ end.to_json(*args)
27
29
  end
28
30
  end
29
31
  end
@@ -1,5 +1,5 @@
1
1
  require 'ox'
2
- Ox.default_options = {skip: :skip_none}
2
+ Ox.default_options = { skip: :skip_none }
3
3
 
4
4
  require 'sequenceserver/report'
5
5
  require 'sequenceserver/links'
@@ -37,8 +37,10 @@ module SequenceServer
37
37
 
38
38
  def to_json
39
39
  [:querydb, :program, :program_version, :params, :stats,
40
- :queries].inject({}) { |h, k| h[k] = send(k); h }.
41
- update(search_id: job.id, submitted_at: job.submitted_at.utc).to_json
40
+ :queries].inject({}) { |h, k|
41
+ h[k] = send(k)
42
+ h
43
+ }.update(search_id: job.id, submitted_at: job.submitted_at.utc).to_json
42
44
  end
43
45
 
44
46
  private
@@ -82,13 +84,13 @@ module SequenceServer
82
84
  def extract_stats(ir)
83
85
  stats = ir[8].first[5][0]
84
86
  @stats = {
85
- :nsequences => stats[0],
86
- :ncharacters => stats[1],
87
- :hsp_length => stats[2],
88
- :search_space => stats[3],
89
- :kappa => stats[4],
90
- :labmda => stats[5],
91
- :entropy => stats[6]
87
+ nsequences: stats[0],
88
+ ncharacters: stats[1],
89
+ hsp_length: stats[2],
90
+ search_space: stats[3],
91
+ kappa: stats[4],
92
+ labmda: stats[5],
93
+ entropy: stats[6]
92
94
  }
93
95
  end
94
96
 
@@ -115,7 +117,7 @@ module SequenceServer
115
117
  n[2] = defline.join(' ')
116
118
  end
117
119
  hit = Hit.new(query, n[0], n[1], n[3], n[2], n[4],
118
- tsv_ir[n[1]][0], tsv_ir[n[1]][1],[])
120
+ tsv_ir[n[1]][0], tsv_ir[n[1]][1], [])
119
121
  extract_hsps(n[5], tsv_ir[n[1]][2], hit)
120
122
  query.hits << hit
121
123
  end
@@ -133,23 +135,23 @@ module SequenceServer
133
135
  def parse_xml(xml)
134
136
  node_to_array Ox.parse(xml).root
135
137
  rescue Ox::ParseError
136
- fail InputError, <<MSG
137
- BLAST generated incorrect XML output. This can happen if sequence ids in your
138
- databases are not unique across all files. As a temporary workaround, you can
139
- repeat the search with one database at a time. Proper fix is to recreate the
140
- following databases with unique sequence ids:
138
+ fail InputError, <<~MSG
139
+ BLAST generated incorrect XML output. This can happen if sequence ids in your
140
+ databases are not unique across all files. As a temporary workaround, you can
141
+ repeat the search with one database at a time. Proper fix is to recreate the
142
+ following databases with unique sequence ids:
141
143
 
142
- #{querydb.map(&:title).join(', ')}
144
+ #{querydb.map(&:title).join(', ')}
143
145
 
144
- If you are not the one managing this server, try to let the manager know
145
- about this.
146
- MSG
146
+ If you are not the one managing this server, try to let the manager know
147
+ about this.
148
+ MSG
147
149
  end
148
150
 
149
- PARSEABLE_AS_HASH = %w(Parameters)
150
- PARSEABLE_AS_ARRAY = %w(BlastOutput_param Iteration_stat Statistics
151
+ PARSEABLE_AS_HASH = %w[Parameters].freeze
152
+ PARSEABLE_AS_ARRAY = %w[BlastOutput_param Iteration_stat Statistics
151
153
  Iteration_hits BlastOutput_iterations
152
- Iteration Hit Hit_hsps Hsp)
154
+ Iteration Hit Hit_hsps Hsp].freeze
153
155
 
154
156
  def node_to_hash(element)
155
157
  Hash[*element.nodes.map { |n| [n.name, node_to_value(n)] }.flatten]
@@ -60,6 +60,20 @@ module SequenceServer
60
60
  data[:database_dir] ||= database_dir
61
61
  end
62
62
 
63
+ # We would like a persistent :options: key in the config file, explicitly
64
+ # stating the parameters to use. Recommended values are written to config
65
+ # file on the first run. However, existing users won't have :options: key
66
+ # available from before. For them, we retain the old behaviour of
67
+ # automatically adding `-task blastn` for BLASTN searches.
68
+ if blast_opts = data.dig(:options, :blastn)
69
+ unless blast_opts.join.match('-task')
70
+ # Issue a warning.
71
+ logger.info "BLASTN will be run using '-task blastn' option." +
72
+ " You can override this through configuration file."
73
+ data[:options][:blastn].push '-task blastn'
74
+ end
75
+ end
76
+
63
77
  data
64
78
  end
65
79
 
@@ -67,11 +81,10 @@ module SequenceServer
67
81
  # otherwise.
68
82
  def parse_config_file
69
83
  unless file? config_file
70
- logger.debug "Configuration file not found: #{config_file}"
84
+ logger.info "Configuration file not found: #{config_file}"
71
85
  return {}
72
86
  end
73
-
74
- logger.debug "Reading configuration file: #{config_file}."
87
+ logger.info "Reading configuration file: #{config_file}."
75
88
  normalize YAML.load_file(config_file)
76
89
  rescue => error
77
90
  raise CONFIG_FILE_ERROR.new(config_file, error)
@@ -84,9 +97,16 @@ module SequenceServer
84
97
  # Default configuration data.
85
98
  def defaults
86
99
  {
87
- :num_threads => 1,
88
- :port => 4567,
89
- :host => '0.0.0.0'
100
+ host: '0.0.0.0',
101
+ port: 4567,
102
+ options: {
103
+ blastn: ['-task blastn', '-evalue 1e-5'],
104
+ blastp: ['-evalue 1e-5'],
105
+ blastx: ['-evalue 1e-5'],
106
+ tblastx: ['-evalue 1e-5'],
107
+ tblastn: ['-evalue 1e-5']
108
+ },
109
+ num_threads: 1
90
110
  }
91
111
  end
92
112
  end
@@ -38,7 +38,7 @@ module SequenceServer
38
38
 
39
39
  def include?(accession)
40
40
  cmd = "blastdbcmd -entry '#{accession}' -db #{name}"
41
- out, _ = sys(cmd, path: config[:bin])
41
+ out, = sys(cmd, path: config[:bin])
42
42
  !out.empty?
43
43
  end
44
44
 
@@ -159,7 +159,7 @@ module SequenceServer
159
159
  next if Database.include? file
160
160
  next unless probably_fasta? file
161
161
  sequence_type = guess_sequence_type_in_fasta file
162
- if [:protein, :nucleotide].include?(sequence_type)
162
+ if %i[protein nucleotide].include?(sequence_type)
163
163
  list << [file, sequence_type]
164
164
  end
165
165
  end
@@ -229,8 +229,9 @@ module SequenceServer
229
229
  # /home/ben/pd.ben/sequenceserver/db/nr => no
230
230
  # /home/ben/pd.ben/sequenceserver/db/img3.5.finished.faa.01 => yes
231
231
  # /home/ben/pd.ben/sequenceserver/db/nr00 => no
232
+ # /mnt/blast-db/refseq_genomic.100 => yes
232
233
  def multipart_database_name?(db_name)
233
- !(db_name.match(%r{.+/\S+\.\d{2}$}).nil?)
234
+ !(db_name.match(%r{.+/\S+\.\d{2,3}$}).nil?)
234
235
  end
235
236
 
236
237
  # Returns true if first character of the file is '>'.
@@ -34,8 +34,8 @@ module SequenceServer
34
34
 
35
35
  out = `blastdbcmd -entry all -db #{db.name} -outfmt "%i" 2> /dev/null`
36
36
  {
37
- :db => db,
38
- :seqids => out.to_s.split
37
+ db: db,
38
+ seqids: out.to_s.split
39
39
  }
40
40
  end.compact
41
41
  end
@@ -66,33 +66,33 @@ module SequenceServer
66
66
 
67
67
  case error
68
68
  when ERROR_PARSE_SEQIDS
69
- puts <<MSG
70
- *** Doctor has found improperly formatted database:
71
- #{bullet_list(values)}
72
- Please reformat your databases with -parse_seqids switch (or use
73
- sequenceserver -m) for using SequenceServer as the current format
74
- may cause problems.
69
+ puts <<~MSG
70
+ *** Doctor has found improperly formatted database:
71
+ #{bullet_list(values)}
72
+ Please reformat your databases with -parse_seqids switch (or use
73
+ sequenceserver -m) for using SequenceServer as the current format
74
+ may cause problems.
75
75
 
76
- These databases are ignored in further checks.
77
- MSG
76
+ These databases are ignored in further checks.
77
+ MSG
78
78
 
79
79
  when ERROR_NUMERIC_IDS
80
- puts <<MSG
81
- *** Doctor has found databases with numeric sequence ids:
82
- #{bullet_list(values)}
83
- Note that this may cause problems with sequence retrieval.
84
- MSG
80
+ puts <<~MSG
81
+ *** Doctor has found databases with numeric sequence ids:
82
+ #{bullet_list(values)}
83
+ Note that this may cause problems with sequence retrieval.
84
+ MSG
85
85
 
86
86
  when ERROR_PROBLEMATIC_IDS
87
- puts <<MSG
88
- *** Doctor has found databases with problematic sequence ids:
89
- #{bullet_list(values)}
90
- This causes some sequence to contain extraneous words like `gnl|`
91
- appended to their id string.
92
- MSG
87
+ puts <<~MSG
88
+ *** Doctor has found databases with problematic sequence ids:
89
+ #{bullet_list(values)}
90
+ This causes some sequence to contain extraneous words like `gnl|`
91
+ appended to their id string.
92
+ MSG
93
93
  end
94
94
  end
95
- # rubocop:disable Metrics/MethodLength
95
+ # rubocop:enable Metrics/MethodLength
96
96
  end
97
97
 
98
98
  def initialize