sequenceserver 1.0.0.pre.3 → 1.0.0.pre.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,22 +2,17 @@ module SequenceServer
2
2
  # Define BLAST::Hit.
3
3
  module BLAST
4
4
  # Hit Object to store all the hits per Query.
5
- # @member [Fixnum] number
6
- # @member [String] id
7
- # @member [String] def
8
- # @member [String] accession
9
- # @member [Fixnum] len
10
- # @member [HSP] hsp
11
- Hit = Struct.new(:number, :id, :title, :accession, :len, :hsps) do
5
+ Hit = Struct.new(:query, :number, :id, :accession, :title,
6
+ :length, :hsps) do
7
+ include Links
8
+
12
9
  def initialize(*args)
13
- args[0] = args[0].to_i
14
- args[2] = '' if args[2] == 'No definition line'
15
- args[4] = args[4].to_i
10
+ args[1] = args[1].to_i
11
+ args[4] = '' if args[4] == 'No definition line'
12
+ args[5] = args[5].to_i
16
13
  super
17
14
  end
18
15
 
19
- alias_method :length, :len
20
-
21
16
  # Hit evalue is the minimum evalue of all HSP(s).
22
17
  def evalue
23
18
  hsps.map(&:evalue).min
@@ -27,6 +22,32 @@ module SequenceServer
27
22
  def score
28
23
  hsps.map(&:bit_score).reduce(:+)
29
24
  end
25
+
26
+ def links
27
+ links = Links.instance_methods.map { |m| send m }
28
+ links.compact!
29
+ links.sort_by! { |link| link[:order] }
30
+ end
31
+
32
+ # Returns an array of database objects which contain the queried sequence
33
+ # id.
34
+ #
35
+ # NOTE:
36
+ # This function may return more than one database object for a single
37
+ # sequence id.
38
+ #
39
+ # e.g., which_blastdb('SI_2.2.23') => [<Database: ...>, ...]
40
+ def whichdb
41
+ querydb.select { |db| db.include? id }
42
+ end
43
+
44
+ def report
45
+ query.report
46
+ end
47
+
48
+ def querydb
49
+ report.querydb
50
+ end
30
51
  end
31
52
  end
32
53
  end
@@ -6,11 +6,11 @@ module SequenceServer
6
6
  # HSP class is not used directly. Relevant HSP stats and formatting the
7
7
  # alignment changes with BLAST algorithm. We subclass HSP for each BLAST
8
8
  # algorithm.
9
- HSP = Struct.new(:number, :bit_score, :score, :evalue, :qstart, :qend,
9
+ HSP = Struct.new(:hit, :number, :bit_score, :score, :evalue, :qstart, :qend,
10
10
  :sstart, :send, :qframe, :sframe, :identity, :positives,
11
- :gaps, :len, :qseq, :sseq, :midline) do
12
- INTEGER_ARGS = [0, 2].concat((4..13).to_a)
13
- FLOAT_ARGS = [1, 3]
11
+ :gaps, :length, :qseq, :sseq, :midline) do
12
+ INTEGER_ARGS = [1, 3].concat((5..14).to_a)
13
+ FLOAT_ARGS = [2, 4]
14
14
 
15
15
  def initialize(*args)
16
16
  INTEGER_ARGS.each do |i|
@@ -24,8 +24,6 @@ module SequenceServer
24
24
  super
25
25
  end
26
26
 
27
- alias_method :length, :len
28
-
29
27
  # Returns a Hash of stats common to all BLAST algorithms. Subclasses must
30
28
  # update the returned Hash to add relevant stats of their own.
31
29
  #
@@ -4,14 +4,14 @@ module SequenceServer
4
4
  # Capture results per query of a BLAST search.
5
5
  # @member [String] number
6
6
  # @member [String] def
7
- # @member [Fixnum] len
7
+ # @member [Fixnum] length
8
8
  # @member [Array(Hit)] hits
9
- Query = Struct.new(:number, :def, :len, :hits) do
9
+ Query = Struct.new(:report, :number, :def, :length, :hits) do
10
10
  def initialize(*args)
11
- args[0] = args[0].to_i
12
- args[1] = "Query_#{args[0]}" if args[1] == 'No definition line'
13
- args[2] = args[2].to_i
14
- @id, *rest = args[1].split
11
+ args[1] = args[1].to_i
12
+ args[2] = "Query_#{args[1]}" if args[2] == 'No definition line'
13
+ args[3] = args[3].to_i
14
+ @id, *rest = args[2].split
15
15
  @title = rest.join(' ')
16
16
  super
17
17
  end
@@ -21,8 +21,6 @@ module SequenceServer
21
21
  end
22
22
 
23
23
  attr_reader :id, :title
24
-
25
- alias_method :length, :len
26
24
  end
27
25
  end
28
26
  end
@@ -1,99 +1,116 @@
1
1
  module SequenceServer
2
2
  module BLAST
3
- # Captures BLAST results from BLAST+'s XML output.
3
+ # Captures results of a BLAST search.
4
+ #
5
+ # A report is constructed from a search id. Search id is simply the
6
+ # basename of the temporary file that holds BLAST results in binary
7
+ # BLAST archive format.
8
+ #
9
+ # For a given search id, result is obtained in XML format using the
10
+ # Formatter class, parsed into a simple intermediate representation
11
+ # (Array of values and Arrays) and information extracted from the
12
+ # intermediate representation (ir).
4
13
  class Report
5
- include Links
6
-
7
- # Expects a File object and Database objects used to BLAST against.
8
- #
9
- # Parses the XML file into an intermediate representation (ir) and
10
- # constructs an object model from that.
11
- #
12
- # NOTE:
13
- # Databases param is optional for test suite.
14
- #
15
- # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
16
- def initialize(rfile, databases = nil)
17
- @archive_file = rfile
18
-
19
- xml_file = BLAST.format('report' => @archive_file,
20
- 'type' => 'full',
21
- 'format' => 'xml')
22
-
23
- ir = File.open(xml_file[:filepath]) do |f|
24
- node_to_array Ox.parse(f.read).root
25
- end
26
-
27
- @program = ir[0]
28
- @program_version = ir[1]
14
+ # Expects a BLAST search id and an Array of Database objects that were
15
+ # used to BLAST. The second argument being optional to aid test suite.
16
+ def initialize(search_id, databases = nil)
17
+ @search_id = search_id
29
18
  @querydb = Array databases
30
- @parameters = {
31
- :matrix => ir[7][0],
32
- :evalue => ir[7][1],
33
- :gapopen => ir[7][2],
34
- :gapextend => ir[7][3],
35
- :filters => ir[7][4]
36
- }
19
+ @queries = []
37
20
 
38
- ir[8].each_with_index do |n, i|
39
- @stats ||= n[5][0]
40
- @queries ||= []
41
- @queries.push(Query.new(n[0], n[2], n[3], []))
42
-
43
- # Ensure a hit object is received. No hits, returns a newline. Note
44
- # that checking to "\n" doesn't work since n[4] = ["\n"]
45
- if n[4] == ["\n"]
46
- @queries[i][:hits] = []
47
- else
48
- n[4].each_with_index do |hits, j|
49
- @queries[i][:hits].push(Hit.new(hits[0], hits[1], hits[2],
50
- hits[3], hits[4], []))
51
- hits[5].each do |hsp|
52
- hsp_klass = HSP.const_get program.upcase
53
- @queries[i][:hits][j][:hsps].push(hsp_klass.new(*hsp))
54
- end
55
- end
56
- @queries[i].sort_hits_by_evalue!
57
- end
58
- end
21
+ generate
59
22
  end
60
- # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
61
23
 
62
- attr_reader :archive_file
63
- attr_reader :program, :program_version
24
+ attr_reader :search_id, :querydb
64
25
 
65
26
  # :nodoc:
66
- # params are defaults provided by BLAST or user input to tweak the
67
- # result. stats are computed metrics provided by BLAST.
68
- #
69
- # BLAST+ doesn't list all input params (like word_size) in the XML
70
- # output. Only matrix, evalue, gapopen, gapextend, and filters.
27
+ # Attributes parsed out from XML output.
28
+ attr_reader :program, :program_version
71
29
  attr_reader :params, :stats
72
-
73
- attr_reader :querydb
74
-
75
30
  attr_reader :queries
76
31
 
77
- # Helper methods for pretty printing results
32
+ private
78
33
 
79
- def link_per_hit(sequence_id)
80
- links = Links.instance_methods.map { |m| send(m, sequence_id) }
34
+ # Generate report.
35
+ def generate
36
+ xml = Formatter.run(search_id, 'xml').file
37
+ ir = node_to_array(Ox.parse(xml.open.read).root)
38
+ extract_program_info ir
39
+ extract_params ir
40
+ extract_stats ir
41
+ extract_query ir
42
+ end
81
43
 
82
- # Sort links based on :order key (ascending)
83
- links.compact!.sort_by! { |link| link[:order] }
44
+ # Make program name and program name + version available via `program`
45
+ # and `program_version` attributes.
46
+ def extract_program_info(ir)
47
+ @program = ir[0]
48
+ @program_version = ir[1]
84
49
  end
85
50
 
86
- # Returns an array of database objects which contain the queried
87
- # sequence id.
88
- # NOTE: This function may return more than one database object for
89
- # a single sequence id.
51
+ # Make search params available via `params` attribute.
90
52
  #
91
- # e.g., which_blastdb('SI_2.2.23') => [<Database: ...>, ...]
92
- def which_blastdb(sequence_id)
93
- querydb.select { |db| db.include? sequence_id }
53
+ # Search params tweak the results. Like evalue cutoff or penalty to open
54
+ # a gap. BLAST+ doesn't list all input params in the XML output. Only
55
+ # matrix, evalue, gapopen, gapextend, and filters are available from XML
56
+ # output.
57
+ def extract_params(ir)
58
+ params = ir[7]
59
+ @params = {
60
+ :matrix => params[0],
61
+ :evalue => params[1],
62
+ :gapopen => params[2],
63
+ :gapextend => params[3],
64
+ :filters => params[4]
65
+ }
94
66
  end
95
67
 
96
- private
68
+ # Make search stats available via `stats` attribute.
69
+ #
70
+ # Search stats are computed metrics. Like total number of sequences or
71
+ # effective search space.
72
+ def extract_stats(ir)
73
+ stats = ir[8].first[5][0]
74
+ @stats = {
75
+ :nsequences => stats[0],
76
+ :ncharacters => stats[1],
77
+ :hsp_length => stats[2],
78
+ :search_space => stats[3],
79
+ :kappa => stats[4],
80
+ :labmda => stats[5],
81
+ :entropy => stats[6]
82
+ }
83
+ end
84
+
85
+ # Make results for each input query available via `queries` atribute.
86
+ def extract_query(ir)
87
+ ir[8].each do |n|
88
+ query = Query.new(self, n[0], n[2], n[3], [])
89
+ extract_hits(n[4], query)
90
+ query.sort_hits_by_evalue!
91
+ queries << query
92
+ end
93
+ end
94
+
95
+ # Create Hit objects from given ir and associate them to query i.
96
+ def extract_hits(hits_ir, query)
97
+ return if hits_ir == ["\n"] # => No hits.
98
+ hits_ir.each do |n|
99
+ hit = Hit.new(query, n[0], n[1], n[3], n[2], n[4], [])
100
+ extract_hsps(n[5], hit)
101
+ query.hits << hit
102
+ end
103
+ end
104
+
105
+ # Create HSP objects from the given ir and associate them with hit j of
106
+ # query i.
107
+ def extract_hsps(hsp_ir, hit)
108
+ hsp_ir.each do |n|
109
+ hsp_klass = HSP.const_get program.upcase
110
+ hsp = hsp_klass.new(*[hit, *n])
111
+ hit.hsps << hsp
112
+ end
113
+ end
97
114
 
98
115
  PARSEABLE_AS_ARRAY = %w(Parameters BlastOutput_param Iteration_stat
99
116
  Statistics Iteration_hits BlastOutput_iterations
@@ -1,3 +1,4 @@
1
+ require 'yaml'
1
2
  require 'forwardable'
2
3
 
3
4
  # Define Config class.
@@ -23,7 +23,7 @@ module SequenceServer
23
23
 
24
24
  def to_s
25
25
  <<MSG
26
- Error in config file: #{ent}.
26
+ Error reading config file: #{ent}.
27
27
  #{err}
28
28
  MSG
29
29
  end
@@ -10,15 +10,7 @@ module SequenceServer
10
10
 
11
11
  NCBI_ID_PATTERN = /gi\|(\d+)\|/
12
12
 
13
- # Your custom method should have following pattern:
14
- #
15
- # Input
16
- # -----
17
- # sequence_id: Array of sequence ids
18
- #
19
- # Return
20
- # ------
21
- # The return value should be a Hash:
13
+ # Link generators return a Hash like below.
22
14
  #
23
15
  # {
24
16
  # # Required. Display title.
@@ -56,21 +48,21 @@ module SequenceServer
56
48
  # querydb:
57
49
  # Returns an array of databases that were used for BLASTing.
58
50
  #
59
- # which_blastdb:
51
+ # whichdb:
60
52
  # Returns the database from which the given hit came from.
61
53
  #
62
54
  # e.g:
63
55
  #
64
- # hit_database = which_blastdb sequence_id
56
+ # hit_database = whichdb
65
57
  #
66
58
  # Examples:
67
59
  # ---------
68
60
  # See methods provided by default for an example implementation.
69
61
 
70
- def sequence_viewer(sequence_id)
71
- sequence_id = encode sequence_id
62
+ def sequence_viewer
63
+ accession = encode self.accession
72
64
  database_ids = encode querydb.map(&:id).join(' ')
73
- url = "get_sequence/?sequence_ids=#{sequence_id}" \
65
+ url = "get_sequence/?sequence_ids=#{accession}" \
74
66
  "&database_ids=#{database_ids}"
75
67
 
76
68
  {
@@ -82,28 +74,29 @@ module SequenceServer
82
74
  }
83
75
  end
84
76
 
85
- def fasta_download(sequence_id)
86
- sequence_id = encode sequence_id
77
+ def fasta_download
78
+ accession = encode self.accession
87
79
  database_ids = encode querydb.map(&:id).join(' ')
88
- url = "get_sequence/?sequence_ids=#{sequence_id}" \
80
+ url = "get_sequence/?sequence_ids=#{accession}" \
89
81
  "&database_ids=#{database_ids}&download=fasta"
90
82
 
91
83
  {
92
84
  :order => 1,
93
85
  :title => 'FASTA',
94
86
  :url => url,
87
+ :class => 'download',
95
88
  :icon => 'fa-download'
96
89
  }
97
90
  end
98
91
 
99
- def ncbi(sequence_id)
100
- return nil unless sequence_id.match(NCBI_ID_PATTERN)
92
+ def ncbi
93
+ return nil unless id.match(NCBI_ID_PATTERN)
101
94
  ncbi_id = Regexp.last_match[1]
102
95
  ncbi_id = encode ncbi_id
103
- url = "http://www.ncbi.nlm.nih.gov/#{querydb.first.typ}/#{ncbi_id}"
96
+ url = "http://www.ncbi.nlm.nih.gov/#{querydb.first.type}/#{ncbi_id}"
104
97
  {
105
98
  :order => 2,
106
- :title => 'View on NCBI',
99
+ :title => 'NCBI',
107
100
  :url => url,
108
101
  :icon => 'fa-external-link'
109
102
  }
@@ -0,0 +1,163 @@
1
+ require 'json'
2
+ require 'sinatra/base'
3
+
4
+ module SequenceServer
5
+ # Controller.
6
+ class Routes < Sinatra::Base
7
+ # See
8
+ # http://www.sinatrarb.com/configuration.html
9
+ configure do
10
+ # We don't need Rack::MethodOverride. Let's avoid the overhead.
11
+ disable :method_override
12
+
13
+ # Ensure exceptions never leak out of the app. Exceptions raised within
14
+ # the app must be handled by the app. We do this by attaching error
15
+ # blocks to exceptions we know how to handle and attaching to Exception
16
+ # as fallback.
17
+ disable :show_exceptions, :raise_errors
18
+
19
+ # Make it a policy to dump to 'rack.errors' any exception raised by the
20
+ # app so that error handlers don't have to do it themselves. But for it
21
+ # to always work, Exceptions defined by us should not respond to `code`
22
+ # or http_status` methods. Error blocks errors must explicitly set http
23
+ # status, if needed, by calling `status` method.
24
+ # method.
25
+ enable :dump_errors
26
+
27
+ # We don't want Sinatra do setup any loggers for us. We will use our own.
28
+ set :logging, nil
29
+
30
+ # Public, and views directory will be found here.
31
+ set :root, lambda { SequenceServer.root }
32
+ end
33
+
34
+ # See
35
+ # http://www.sinatrarb.com/intro.html#Mime%20Types
36
+ configure do
37
+ mime_type :fasta, 'text/fasta'
38
+ mime_type :xml, 'text/xml'
39
+ mime_type :tsv, 'text/tsv'
40
+ end
41
+
42
+ configure :production do
43
+ set :public_folder,
44
+ lambda { File.join SequenceServer.root, 'public', 'dist' }
45
+ end
46
+
47
+ helpers do
48
+ # Render an anchor element from the given Hash.
49
+ #
50
+ # See links.rb for example of a Hash object that will be rendered.
51
+ def a(link)
52
+ return unless link[:title] && link[:url]
53
+ target = absolute?(link[:url]) && '_blank' || '_self'
54
+ a = %(<a href="#{link[:url]}" class="#{link[:class]}" \
55
+ target="#{target}">)
56
+ a << %(<i class="fa #{link[:icon]}"></i> ) if link[:icon]
57
+ a << "#{link[:title]}</a>"
58
+ end
59
+
60
+ # Is the given URI absolute? (or relative?)
61
+ #
62
+ # Returns false if nil is passed.
63
+ def absolute?(uri)
64
+ uri && URI.parse(uri).absolute?
65
+ end
66
+
67
+ # Prettify given data.
68
+ def prettify(data)
69
+ return prettify_tuple(data) if tuple? data
70
+ return prettify_float(data) if data.is_a? Float
71
+ data
72
+ end
73
+
74
+ # Formats float as "a.bcd" or "a x b^c". The latter if float is
75
+ # scientific notation. Former otherwise.
76
+ def prettify_float(float)
77
+ float.to_s.sub(/(\d*\.\d*)e?([+-]\d*)?/) do
78
+ base = Regexp.last_match[1]
79
+ power = Regexp.last_match[2]
80
+ s = format '%.2f', base
81
+ s << " &times; 10<sup>#{power}</sup>" if power
82
+ s
83
+ end
84
+ end
85
+
86
+ # Formats an array of two elements as "first (last)".
87
+ def prettify_tuple(tuple)
88
+ "#{tuple.first} (#{tuple.last})"
89
+ end
90
+
91
+ # Is the given value a tuple? (array of length two).
92
+ def tuple?(data)
93
+ return true if data.is_a?(Array) && data.length == 2
94
+ end
95
+ end
96
+
97
+ # For any request that hits the app in development mode, log incoming
98
+ # params.
99
+ before do
100
+ logger.debug params
101
+ end
102
+
103
+ # Render the search form.
104
+ get '/' do
105
+ erb :search, :locals => { :databases => Database.group_by(&:type) }
106
+ end
107
+
108
+ # BLAST search!
109
+ post '/' do
110
+ erb :result, :locals => { :report => BLAST.run(params) }
111
+ end
112
+
113
+ # @params sequence_ids: whitespace separated list of sequence ids to
114
+ # retrieve
115
+ # @params database_ids: whitespace separated list of database ids to
116
+ # retrieve the sequence from.
117
+ # @params download: whether to return raw response or initiate file
118
+ # download
119
+ #
120
+ # Use whitespace to separate entries in sequence_ids (all other chars exist
121
+ # in identifiers) and retreival_databases (we don't allow whitespace in a
122
+ # database's name, so it's safe).
123
+ get '/get_sequence/' do
124
+ sequence_ids = params[:sequence_ids].split(/\s/)
125
+ database_ids = params[:database_ids].split(/\s/)
126
+
127
+ sequences = Sequence::Retriever.new(sequence_ids, database_ids,
128
+ params[:download])
129
+
130
+ send_file(sequences.file.path,
131
+ :type => sequences.mime,
132
+ :filename => sequences.filename) if params[:download]
133
+
134
+ sequences.to_json
135
+ end
136
+
137
+ # Download BLAST report in various formats.
138
+ get '/download/:search_id.:type' do
139
+ out = BLAST::Formatter.new(params[:search_id], params[:type])
140
+ send_file out.file.path, :filename => out.filename, :type => out.mime
141
+ end
142
+
143
+ # This error block will only ever be hit if the user gives us a funny
144
+ # sequence or incorrect advanced parameter. Well, we could hit this block
145
+ # if someone is playing around with our HTTP API too.
146
+ error BLAST::ArgumentError do
147
+ status 400
148
+ error = env['sinatra.error']
149
+ erb :'400', :locals => { :error => error }
150
+ end
151
+
152
+ # This will catch any unhandled error and some very special errors. Ideally
153
+ # we will never hit this block. If we do, there's a bug in SequenceServer
154
+ # or something really weird going on. If we hit this error block we show
155
+ # the stacktrace to the user requesting them to post the same to our Google
156
+ # Group.
157
+ error Exception, BLAST::RuntimeError do
158
+ status 500
159
+ error = env['sinatra.error']
160
+ erb :'500', :locals => { :error => error }
161
+ end
162
+ end
163
+ end