sequenceserver 1.1.0.beta11 → 1.1.0.beta12
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sequenceserver might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/.rubocop.yml +14 -9
- data/Rakefile +1 -1
- data/bin/sequenceserver +79 -89
- data/lib/sequenceserver.rb +21 -18
- data/lib/sequenceserver/api_errors.rb +10 -11
- data/lib/sequenceserver/blast/constants.rb +6 -6
- data/lib/sequenceserver/blast/formatter.rb +3 -4
- data/lib/sequenceserver/blast/hit.rb +5 -2
- data/lib/sequenceserver/blast/hsp.rb +8 -5
- data/lib/sequenceserver/blast/job.rb +14 -13
- data/lib/sequenceserver/blast/query.rb +5 -3
- data/lib/sequenceserver/blast/report.rb +25 -23
- data/lib/sequenceserver/config.rb +26 -6
- data/lib/sequenceserver/database.rb +4 -3
- data/lib/sequenceserver/doctor.rb +22 -22
- data/lib/sequenceserver/exceptions.rb +17 -24
- data/lib/sequenceserver/job.rb +7 -7
- data/lib/sequenceserver/links.rb +11 -12
- data/lib/sequenceserver/logger.rb +1 -1
- data/lib/sequenceserver/pool.rb +0 -2
- data/lib/sequenceserver/report.rb +0 -1
- data/lib/sequenceserver/routes.rb +16 -16
- data/lib/sequenceserver/sequence.rb +24 -20
- data/lib/sequenceserver/server.rb +8 -8
- data/lib/sequenceserver/version.rb +1 -1
- data/public/js/search.js +2 -12
- data/public/sequenceserver-search.min.js +2 -2
- data/sequenceserver.gemspec +18 -17
- metadata +55 -55
@@ -1,5 +1,4 @@
|
|
1
1
|
module SequenceServer
|
2
|
-
|
3
2
|
# API errors have an http status, title, message, and additional information
|
4
3
|
# like stacktrace or information from program output.
|
5
4
|
class APIError < StandardError
|
@@ -38,10 +37,10 @@ module SequenceServer
|
|
38
37
|
end
|
39
38
|
|
40
39
|
def message
|
41
|
-
|
42
|
-
Looks like there's a problem with one of the query sequences, selected
|
43
|
-
databases, or advanced parameters.
|
44
|
-
MSG
|
40
|
+
<<~MSG
|
41
|
+
Looks like there's a problem with one of the query sequences, selected
|
42
|
+
databases, or advanced parameters.
|
43
|
+
MSG
|
45
44
|
end
|
46
45
|
|
47
46
|
attr_reader :more_info
|
@@ -63,12 +62,12 @@ MSG
|
|
63
62
|
end
|
64
63
|
|
65
64
|
def message
|
66
|
-
|
67
|
-
Looks like there is a problem with the server. Try visiting the page again
|
68
|
-
after a while. If this message persists, please report the problem on our
|
69
|
-
<a href="https://github.com/wurmlab/sequenceserver/issues" target="_blank">
|
70
|
-
issue tracker</a>.
|
71
|
-
MSG
|
65
|
+
<<~MSG
|
66
|
+
Looks like there is a problem with the server. Try visiting the page again
|
67
|
+
after a while. If this message persists, please report the problem on our
|
68
|
+
<a href="https://github.com/wurmlab/sequenceserver/issues" target="_blank">
|
69
|
+
issue tracker</a>.
|
70
|
+
MSG
|
72
71
|
end
|
73
72
|
|
74
73
|
attr_reader :more_info
|
@@ -4,18 +4,18 @@ module SequenceServer
|
|
4
4
|
module BLAST
|
5
5
|
ERROR_LINE = /Error:\s(.*)/
|
6
6
|
|
7
|
-
ALGORITHMS = %w
|
7
|
+
ALGORITHMS = %w[blastn blastp blastx tblastn tblastx].freeze
|
8
8
|
|
9
|
-
OUTFMT_SPECIFIERS = %w
|
9
|
+
OUTFMT_SPECIFIERS = %w[qseqid qgi qacc sseqid sallseqid sgi sallgi sacc
|
10
10
|
sallacc qstart qend sstart send qseq sseq evalue
|
11
11
|
bitscore score length length pident nident
|
12
12
|
mismatch positive gapopen gaps ppos frames
|
13
13
|
qframe hframe btop staxids sscinames scomnames
|
14
14
|
sblastnames sskingdoms stitle salltitles sstrand
|
15
|
-
qcovs qcovhsp
|
15
|
+
qcovs qcovhsp].join(' ').freeze
|
16
16
|
|
17
|
-
STDREP = %w
|
18
|
-
sstart send evalue bitscore qcovs qcovhsp
|
17
|
+
STDREP = %w[qseqid sseqid sscinames pident length mismatch gapopen qstart
|
18
|
+
qend sstart send evalue bitscore qcovs qcovhsp].join(' ').freeze
|
19
19
|
OUTFMT = {
|
20
20
|
'pairwise' => [0, :txt],
|
21
21
|
'qa' => [1, :txt],
|
@@ -30,7 +30,7 @@ module SequenceServer
|
|
30
30
|
'asn_binary' => [9, :asn],
|
31
31
|
'csv' => [10, :csv],
|
32
32
|
'archive' => [11, :txt]
|
33
|
-
}
|
33
|
+
}.freeze
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
@@ -9,7 +9,7 @@ module SequenceServer
|
|
9
9
|
# return saved ouput.
|
10
10
|
class Formatter
|
11
11
|
class << self
|
12
|
-
|
12
|
+
alias run new
|
13
13
|
end
|
14
14
|
|
15
15
|
extend Forwardable
|
@@ -41,13 +41,12 @@ module SequenceServer
|
|
41
41
|
return if File.exist?(file)
|
42
42
|
command = "blast_formatter -archive '#{job.stdout}'" \
|
43
43
|
" -outfmt '#{format} #{specifiers}'"
|
44
|
-
sys(command, path: config[:bin], dir: DOTDIR,
|
45
|
-
stdout: file)
|
44
|
+
sys(command, path: config[:bin], dir: DOTDIR, stdout: file)
|
46
45
|
rescue CommandFailed => e
|
47
46
|
# Mostly we will never get here: empty archive file,
|
48
47
|
# file permissions, broken BLAST binaries, etc. will
|
49
48
|
# have been caught before reaching here.
|
50
|
-
|
49
|
+
raise SystemError, e.stderr
|
51
50
|
end
|
52
51
|
end
|
53
52
|
end
|
@@ -59,8 +59,11 @@ module SequenceServer
|
|
59
59
|
end
|
60
60
|
|
61
61
|
def to_json(*args)
|
62
|
-
[
|
63
|
-
|
62
|
+
%i[number id accession title length score identity qcovs
|
63
|
+
sciname evalue hsps links].inject({}) { |h, k|
|
64
|
+
h[k] = send(k)
|
65
|
+
h
|
66
|
+
}.to_json(*args)
|
64
67
|
end
|
65
68
|
|
66
69
|
private
|
@@ -5,8 +5,8 @@ module SequenceServer
|
|
5
5
|
HSP = Struct.new(:hit, :number, :bit_score, :score, :evalue, :qstart, :qend,
|
6
6
|
:sstart, :send, :qframe, :sframe, :identity, :positives,
|
7
7
|
:gaps, :length, :qcovhsp, :qseq, :sseq, :midline) do
|
8
|
-
INTEGER_ARGS = [1, 3].concat((5..15).to_a)
|
9
|
-
FLOAT_ARGS = [2, 4]
|
8
|
+
INTEGER_ARGS = [1, 3].concat((5..15).to_a).freeze
|
9
|
+
FLOAT_ARGS = [2, 4].freeze
|
10
10
|
|
11
11
|
def initialize(*args)
|
12
12
|
INTEGER_ARGS.each do |i|
|
@@ -21,9 +21,12 @@ module SequenceServer
|
|
21
21
|
end
|
22
22
|
|
23
23
|
def to_json(*args)
|
24
|
-
[
|
25
|
-
|
26
|
-
|
24
|
+
%i[number bit_score score evalue qstart qend
|
25
|
+
sstart send qframe sframe identity positives
|
26
|
+
gaps length qcovhsp qseq sseq midline].inject({}) { |h, k|
|
27
|
+
h[k] = self[k]
|
28
|
+
h
|
29
|
+
}.to_json(*args)
|
27
30
|
end
|
28
31
|
end
|
29
32
|
end
|
@@ -1,12 +1,10 @@
|
|
1
1
|
require 'sequenceserver/job'
|
2
2
|
|
3
3
|
module SequenceServer
|
4
|
-
|
5
4
|
# BLAST module.
|
6
5
|
module BLAST
|
7
6
|
# Extends SequenceServer::Job to describe a BLAST job.
|
8
7
|
class Job < Job
|
9
|
-
|
10
8
|
def initialize(params)
|
11
9
|
validate params
|
12
10
|
super do
|
@@ -33,9 +31,11 @@ module SequenceServer
|
|
33
31
|
|
34
32
|
# Override Job#raise! to raise specific API errors based on exitstatus
|
35
33
|
# and using contents of stderr to provide context about the error.
|
34
|
+
#
|
35
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
36
36
|
def raise!
|
37
37
|
# Return true exit status is 0 and stdout is not empty.
|
38
|
-
return true if exitstatus
|
38
|
+
return true if exitstatus.zero? && !File.zero?(stdout)
|
39
39
|
|
40
40
|
# Handle error. See [1].
|
41
41
|
case exitstatus
|
@@ -47,16 +47,16 @@ module SequenceServer
|
|
47
47
|
when 4
|
48
48
|
# Out of memory. User can retry with a shorter search, so raising
|
49
49
|
# InputError here instead of SystemError.
|
50
|
-
fail InputError,
|
51
|
-
Ran out of memory. Please try a smaller query, or searching fewer and smaller
|
52
|
-
databases, or limiting the output by using advanced options.
|
53
|
-
MSG
|
50
|
+
fail InputError, <<~MSG
|
51
|
+
Ran out of memory. Please try a smaller query, or searching fewer and smaller
|
52
|
+
databases, or limiting the output by using advanced options.
|
53
|
+
MSG
|
54
54
|
when 6
|
55
55
|
# Error creating output files. It can't be a permission issue as that
|
56
56
|
# would have been caught while creating job directory. But we can run
|
57
57
|
# out of storage after creating the job directory and while running
|
58
58
|
# the job. This is a SystemError.
|
59
|
-
fail SystemError,
|
59
|
+
fail SystemError, 'Ran out of disk space.'
|
60
60
|
else
|
61
61
|
# I am not sure what the exit codes 2 & 3 means and we should note
|
62
62
|
# encounter exit code 5. The only other error that I know can happen
|
@@ -64,16 +64,17 @@ MSG
|
|
64
64
|
# macOS updates. So raise SystemError, include the exit status in the
|
65
65
|
# message, and say that that the "most likely" reason is broken BLAST+
|
66
66
|
# binaries.
|
67
|
-
fail SystemError,
|
68
|
-
BLAST failed abruptly (exit status: #{exitstatus}). Most likely there is a
|
69
|
-
problem with the BLAST+ binaries.
|
70
|
-
MSG
|
67
|
+
fail SystemError, <<~MSG
|
68
|
+
BLAST failed abruptly (exit status: #{exitstatus}). Most likely there is a
|
69
|
+
problem with the BLAST+ binaries.
|
70
|
+
MSG
|
71
71
|
end
|
72
72
|
end
|
73
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
73
74
|
|
74
75
|
private
|
75
76
|
|
76
|
-
def parse_advanced
|
77
|
+
def parse_advanced(param_line)
|
77
78
|
param_list = (param_line || '').split(' ')
|
78
79
|
res = {}
|
79
80
|
|
@@ -10,7 +10,7 @@ module SequenceServer
|
|
10
10
|
def initialize(*args)
|
11
11
|
args[1] = args[1].to_i
|
12
12
|
if args[2] == 'No definition line' ||
|
13
|
-
|
13
|
+
args[2] == 'unnamed protein product'
|
14
14
|
args[2] = "Query_#{args[1]}"
|
15
15
|
end
|
16
16
|
args[3] = args[3].to_i
|
@@ -22,8 +22,10 @@ module SequenceServer
|
|
22
22
|
attr_reader :id, :title
|
23
23
|
|
24
24
|
def to_json(*args)
|
25
|
-
[
|
26
|
-
|
25
|
+
%i[number id title length hits].inject({}) do |h, k|
|
26
|
+
h[k] = send(k)
|
27
|
+
h
|
28
|
+
end.to_json(*args)
|
27
29
|
end
|
28
30
|
end
|
29
31
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
require 'ox'
|
2
|
-
Ox.default_options = {skip: :skip_none}
|
2
|
+
Ox.default_options = { skip: :skip_none }
|
3
3
|
|
4
4
|
require 'sequenceserver/report'
|
5
5
|
require 'sequenceserver/links'
|
@@ -37,8 +37,10 @@ module SequenceServer
|
|
37
37
|
|
38
38
|
def to_json
|
39
39
|
[:querydb, :program, :program_version, :params, :stats,
|
40
|
-
:queries].inject({}) { |h, k|
|
41
|
-
|
40
|
+
:queries].inject({}) { |h, k|
|
41
|
+
h[k] = send(k)
|
42
|
+
h
|
43
|
+
}.update(search_id: job.id, submitted_at: job.submitted_at.utc).to_json
|
42
44
|
end
|
43
45
|
|
44
46
|
private
|
@@ -82,13 +84,13 @@ module SequenceServer
|
|
82
84
|
def extract_stats(ir)
|
83
85
|
stats = ir[8].first[5][0]
|
84
86
|
@stats = {
|
85
|
-
:
|
86
|
-
:
|
87
|
-
:
|
88
|
-
:
|
89
|
-
:
|
90
|
-
:
|
91
|
-
:
|
87
|
+
nsequences: stats[0],
|
88
|
+
ncharacters: stats[1],
|
89
|
+
hsp_length: stats[2],
|
90
|
+
search_space: stats[3],
|
91
|
+
kappa: stats[4],
|
92
|
+
labmda: stats[5],
|
93
|
+
entropy: stats[6]
|
92
94
|
}
|
93
95
|
end
|
94
96
|
|
@@ -115,7 +117,7 @@ module SequenceServer
|
|
115
117
|
n[2] = defline.join(' ')
|
116
118
|
end
|
117
119
|
hit = Hit.new(query, n[0], n[1], n[3], n[2], n[4],
|
118
|
-
tsv_ir[n[1]][0], tsv_ir[n[1]][1],[])
|
120
|
+
tsv_ir[n[1]][0], tsv_ir[n[1]][1], [])
|
119
121
|
extract_hsps(n[5], tsv_ir[n[1]][2], hit)
|
120
122
|
query.hits << hit
|
121
123
|
end
|
@@ -133,23 +135,23 @@ module SequenceServer
|
|
133
135
|
def parse_xml(xml)
|
134
136
|
node_to_array Ox.parse(xml).root
|
135
137
|
rescue Ox::ParseError
|
136
|
-
fail InputError,
|
137
|
-
BLAST generated incorrect XML output. This can happen if sequence ids in your
|
138
|
-
databases are not unique across all files. As a temporary workaround, you can
|
139
|
-
repeat the search with one database at a time. Proper fix is to recreate the
|
140
|
-
following databases with unique sequence ids:
|
138
|
+
fail InputError, <<~MSG
|
139
|
+
BLAST generated incorrect XML output. This can happen if sequence ids in your
|
140
|
+
databases are not unique across all files. As a temporary workaround, you can
|
141
|
+
repeat the search with one database at a time. Proper fix is to recreate the
|
142
|
+
following databases with unique sequence ids:
|
141
143
|
|
142
|
-
|
144
|
+
#{querydb.map(&:title).join(', ')}
|
143
145
|
|
144
|
-
If you are not the one managing this server, try to let the manager know
|
145
|
-
about this.
|
146
|
-
MSG
|
146
|
+
If you are not the one managing this server, try to let the manager know
|
147
|
+
about this.
|
148
|
+
MSG
|
147
149
|
end
|
148
150
|
|
149
|
-
PARSEABLE_AS_HASH = %w
|
150
|
-
PARSEABLE_AS_ARRAY = %w
|
151
|
+
PARSEABLE_AS_HASH = %w[Parameters].freeze
|
152
|
+
PARSEABLE_AS_ARRAY = %w[BlastOutput_param Iteration_stat Statistics
|
151
153
|
Iteration_hits BlastOutput_iterations
|
152
|
-
Iteration Hit Hit_hsps Hsp
|
154
|
+
Iteration Hit Hit_hsps Hsp].freeze
|
153
155
|
|
154
156
|
def node_to_hash(element)
|
155
157
|
Hash[*element.nodes.map { |n| [n.name, node_to_value(n)] }.flatten]
|
@@ -60,6 +60,20 @@ module SequenceServer
|
|
60
60
|
data[:database_dir] ||= database_dir
|
61
61
|
end
|
62
62
|
|
63
|
+
# We would like a persistent :options: key in the config file, explicitly
|
64
|
+
# stating the parameters to use. Recommended values are written to config
|
65
|
+
# file on the first run. However, existing users won't have :options: key
|
66
|
+
# available from before. For them, we retain the old behaviour of
|
67
|
+
# automatically adding `-task blastn` for BLASTN searches.
|
68
|
+
if blast_opts = data.dig(:options, :blastn)
|
69
|
+
unless blast_opts.join.match('-task')
|
70
|
+
# Issue a warning.
|
71
|
+
logger.info "BLASTN will be run using '-task blastn' option." +
|
72
|
+
" You can override this through configuration file."
|
73
|
+
data[:options][:blastn].push '-task blastn'
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
63
77
|
data
|
64
78
|
end
|
65
79
|
|
@@ -67,11 +81,10 @@ module SequenceServer
|
|
67
81
|
# otherwise.
|
68
82
|
def parse_config_file
|
69
83
|
unless file? config_file
|
70
|
-
logger.
|
84
|
+
logger.info "Configuration file not found: #{config_file}"
|
71
85
|
return {}
|
72
86
|
end
|
73
|
-
|
74
|
-
logger.debug "Reading configuration file: #{config_file}."
|
87
|
+
logger.info "Reading configuration file: #{config_file}."
|
75
88
|
normalize YAML.load_file(config_file)
|
76
89
|
rescue => error
|
77
90
|
raise CONFIG_FILE_ERROR.new(config_file, error)
|
@@ -84,9 +97,16 @@ module SequenceServer
|
|
84
97
|
# Default configuration data.
|
85
98
|
def defaults
|
86
99
|
{
|
87
|
-
:
|
88
|
-
:
|
89
|
-
:
|
100
|
+
host: '0.0.0.0',
|
101
|
+
port: 4567,
|
102
|
+
options: {
|
103
|
+
blastn: ['-task blastn', '-evalue 1e-5'],
|
104
|
+
blastp: ['-evalue 1e-5'],
|
105
|
+
blastx: ['-evalue 1e-5'],
|
106
|
+
tblastx: ['-evalue 1e-5'],
|
107
|
+
tblastn: ['-evalue 1e-5']
|
108
|
+
},
|
109
|
+
num_threads: 1
|
90
110
|
}
|
91
111
|
end
|
92
112
|
end
|
@@ -38,7 +38,7 @@ module SequenceServer
|
|
38
38
|
|
39
39
|
def include?(accession)
|
40
40
|
cmd = "blastdbcmd -entry '#{accession}' -db #{name}"
|
41
|
-
out,
|
41
|
+
out, = sys(cmd, path: config[:bin])
|
42
42
|
!out.empty?
|
43
43
|
end
|
44
44
|
|
@@ -159,7 +159,7 @@ module SequenceServer
|
|
159
159
|
next if Database.include? file
|
160
160
|
next unless probably_fasta? file
|
161
161
|
sequence_type = guess_sequence_type_in_fasta file
|
162
|
-
if [
|
162
|
+
if %i[protein nucleotide].include?(sequence_type)
|
163
163
|
list << [file, sequence_type]
|
164
164
|
end
|
165
165
|
end
|
@@ -229,8 +229,9 @@ module SequenceServer
|
|
229
229
|
# /home/ben/pd.ben/sequenceserver/db/nr => no
|
230
230
|
# /home/ben/pd.ben/sequenceserver/db/img3.5.finished.faa.01 => yes
|
231
231
|
# /home/ben/pd.ben/sequenceserver/db/nr00 => no
|
232
|
+
# /mnt/blast-db/refseq_genomic.100 => yes
|
232
233
|
def multipart_database_name?(db_name)
|
233
|
-
!(db_name.match(%r{.+/\S+\.\d{2}$}).nil?)
|
234
|
+
!(db_name.match(%r{.+/\S+\.\d{2,3}$}).nil?)
|
234
235
|
end
|
235
236
|
|
236
237
|
# Returns true if first character of the file is '>'.
|
@@ -34,8 +34,8 @@ module SequenceServer
|
|
34
34
|
|
35
35
|
out = `blastdbcmd -entry all -db #{db.name} -outfmt "%i" 2> /dev/null`
|
36
36
|
{
|
37
|
-
:
|
38
|
-
:
|
37
|
+
db: db,
|
38
|
+
seqids: out.to_s.split
|
39
39
|
}
|
40
40
|
end.compact
|
41
41
|
end
|
@@ -66,33 +66,33 @@ module SequenceServer
|
|
66
66
|
|
67
67
|
case error
|
68
68
|
when ERROR_PARSE_SEQIDS
|
69
|
-
puts
|
70
|
-
*** Doctor has found improperly formatted database:
|
71
|
-
#{bullet_list(values)}
|
72
|
-
|
73
|
-
|
74
|
-
|
69
|
+
puts <<~MSG
|
70
|
+
*** Doctor has found improperly formatted database:
|
71
|
+
#{bullet_list(values)}
|
72
|
+
Please reformat your databases with -parse_seqids switch (or use
|
73
|
+
sequenceserver -m) for using SequenceServer as the current format
|
74
|
+
may cause problems.
|
75
75
|
|
76
|
-
|
77
|
-
MSG
|
76
|
+
These databases are ignored in further checks.
|
77
|
+
MSG
|
78
78
|
|
79
79
|
when ERROR_NUMERIC_IDS
|
80
|
-
puts
|
81
|
-
*** Doctor has found databases with numeric sequence ids:
|
82
|
-
#{bullet_list(values)}
|
83
|
-
|
84
|
-
MSG
|
80
|
+
puts <<~MSG
|
81
|
+
*** Doctor has found databases with numeric sequence ids:
|
82
|
+
#{bullet_list(values)}
|
83
|
+
Note that this may cause problems with sequence retrieval.
|
84
|
+
MSG
|
85
85
|
|
86
86
|
when ERROR_PROBLEMATIC_IDS
|
87
|
-
puts
|
88
|
-
*** Doctor has found databases with problematic sequence ids:
|
89
|
-
#{bullet_list(values)}
|
90
|
-
|
91
|
-
|
92
|
-
MSG
|
87
|
+
puts <<~MSG
|
88
|
+
*** Doctor has found databases with problematic sequence ids:
|
89
|
+
#{bullet_list(values)}
|
90
|
+
This causes some sequence to contain extraneous words like `gnl|`
|
91
|
+
appended to their id string.
|
92
|
+
MSG
|
93
93
|
end
|
94
94
|
end
|
95
|
-
# rubocop:
|
95
|
+
# rubocop:enable Metrics/MethodLength
|
96
96
|
end
|
97
97
|
|
98
98
|
def initialize
|