sequenceserver 1.0.0.pre.3 → 1.0.0.pre.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenceserver might be problematic. Click here for more details.

@@ -2,22 +2,17 @@ module SequenceServer
2
2
  # Define BLAST::Hit.
3
3
  module BLAST
4
4
  # Hit Object to store all the hits per Query.
5
- # @member [Fixnum] number
6
- # @member [String] id
7
- # @member [String] def
8
- # @member [String] accession
9
- # @member [Fixnum] len
10
- # @member [HSP] hsp
11
- Hit = Struct.new(:number, :id, :title, :accession, :len, :hsps) do
5
+ Hit = Struct.new(:query, :number, :id, :accession, :title,
6
+ :length, :hsps) do
7
+ include Links
8
+
12
9
  def initialize(*args)
13
- args[0] = args[0].to_i
14
- args[2] = '' if args[2] == 'No definition line'
15
- args[4] = args[4].to_i
10
+ args[1] = args[1].to_i
11
+ args[4] = '' if args[4] == 'No definition line'
12
+ args[5] = args[5].to_i
16
13
  super
17
14
  end
18
15
 
19
- alias_method :length, :len
20
-
21
16
  # Hit evalue is the minimum evalue of all HSP(s).
22
17
  def evalue
23
18
  hsps.map(&:evalue).min
@@ -27,6 +22,32 @@ module SequenceServer
27
22
  def score
28
23
  hsps.map(&:bit_score).reduce(:+)
29
24
  end
25
+
26
+ def links
27
+ links = Links.instance_methods.map { |m| send m }
28
+ links.compact!
29
+ links.sort_by! { |link| link[:order] }
30
+ end
31
+
32
+ # Returns an array of database objects which contain the queried sequence
33
+ # id.
34
+ #
35
+ # NOTE:
36
+ # This function may return more than one database object for a single
37
+ # sequence id.
38
+ #
39
+ # e.g., which_blastdb('SI_2.2.23') => [<Database: ...>, ...]
40
+ def whichdb
41
+ querydb.select { |db| db.include? id }
42
+ end
43
+
44
+ def report
45
+ query.report
46
+ end
47
+
48
+ def querydb
49
+ report.querydb
50
+ end
30
51
  end
31
52
  end
32
53
  end
@@ -6,11 +6,11 @@ module SequenceServer
6
6
  # HSP class is not used directly. Relevant HSP stats and formatting the
7
7
  # alignment changes with BLAST algorithm. We subclass HSP for each BLAST
8
8
  # algorithm.
9
- HSP = Struct.new(:number, :bit_score, :score, :evalue, :qstart, :qend,
9
+ HSP = Struct.new(:hit, :number, :bit_score, :score, :evalue, :qstart, :qend,
10
10
  :sstart, :send, :qframe, :sframe, :identity, :positives,
11
- :gaps, :len, :qseq, :sseq, :midline) do
12
- INTEGER_ARGS = [0, 2].concat((4..13).to_a)
13
- FLOAT_ARGS = [1, 3]
11
+ :gaps, :length, :qseq, :sseq, :midline) do
12
+ INTEGER_ARGS = [1, 3].concat((5..14).to_a)
13
+ FLOAT_ARGS = [2, 4]
14
14
 
15
15
  def initialize(*args)
16
16
  INTEGER_ARGS.each do |i|
@@ -24,8 +24,6 @@ module SequenceServer
24
24
  super
25
25
  end
26
26
 
27
- alias_method :length, :len
28
-
29
27
  # Returns a Hash of stats common to all BLAST algorithms. Subclasses must
30
28
  # update the returned Hash to add relevant stats of their own.
31
29
  #
@@ -4,14 +4,14 @@ module SequenceServer
4
4
  # Capture results per query of a BLAST search.
5
5
  # @member [String] number
6
6
  # @member [String] def
7
- # @member [Fixnum] len
7
+ # @member [Fixnum] length
8
8
  # @member [Array(Hit)] hits
9
- Query = Struct.new(:number, :def, :len, :hits) do
9
+ Query = Struct.new(:report, :number, :def, :length, :hits) do
10
10
  def initialize(*args)
11
- args[0] = args[0].to_i
12
- args[1] = "Query_#{args[0]}" if args[1] == 'No definition line'
13
- args[2] = args[2].to_i
14
- @id, *rest = args[1].split
11
+ args[1] = args[1].to_i
12
+ args[2] = "Query_#{args[1]}" if args[2] == 'No definition line'
13
+ args[3] = args[3].to_i
14
+ @id, *rest = args[2].split
15
15
  @title = rest.join(' ')
16
16
  super
17
17
  end
@@ -21,8 +21,6 @@ module SequenceServer
21
21
  end
22
22
 
23
23
  attr_reader :id, :title
24
-
25
- alias_method :length, :len
26
24
  end
27
25
  end
28
26
  end
@@ -1,99 +1,116 @@
1
1
  module SequenceServer
2
2
  module BLAST
3
- # Captures BLAST results from BLAST+'s XML output.
3
+ # Captures results of a BLAST search.
4
+ #
5
+ # A report is constructed from a search id. Search id is simply the
6
+ # basename of the temporary file that holds BLAST results in binary
7
+ # BLAST archive format.
8
+ #
9
+ # For a given search id, result is obtained in XML format using the
10
+ # Formatter class, parsed into a simple intermediate representation
11
+ # (Array of values and Arrays) and information extracted from the
12
+ # intermediate representation (ir).
4
13
  class Report
5
- include Links
6
-
7
- # Expects a File object and Database objects used to BLAST against.
8
- #
9
- # Parses the XML file into an intermediate representation (ir) and
10
- # constructs an object model from that.
11
- #
12
- # NOTE:
13
- # Databases param is optional for test suite.
14
- #
15
- # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
16
- def initialize(rfile, databases = nil)
17
- @archive_file = rfile
18
-
19
- xml_file = BLAST.format('report' => @archive_file,
20
- 'type' => 'full',
21
- 'format' => 'xml')
22
-
23
- ir = File.open(xml_file[:filepath]) do |f|
24
- node_to_array Ox.parse(f.read).root
25
- end
26
-
27
- @program = ir[0]
28
- @program_version = ir[1]
14
+ # Expects a BLAST search id and an Array of Database objects that were
15
+ # used to BLAST. The second argument being optional to aid test suite.
16
+ def initialize(search_id, databases = nil)
17
+ @search_id = search_id
29
18
  @querydb = Array databases
30
- @parameters = {
31
- :matrix => ir[7][0],
32
- :evalue => ir[7][1],
33
- :gapopen => ir[7][2],
34
- :gapextend => ir[7][3],
35
- :filters => ir[7][4]
36
- }
19
+ @queries = []
37
20
 
38
- ir[8].each_with_index do |n, i|
39
- @stats ||= n[5][0]
40
- @queries ||= []
41
- @queries.push(Query.new(n[0], n[2], n[3], []))
42
-
43
- # Ensure a hit object is received. No hits, returns a newline. Note
44
- # that checking to "\n" doesn't work since n[4] = ["\n"]
45
- if n[4] == ["\n"]
46
- @queries[i][:hits] = []
47
- else
48
- n[4].each_with_index do |hits, j|
49
- @queries[i][:hits].push(Hit.new(hits[0], hits[1], hits[2],
50
- hits[3], hits[4], []))
51
- hits[5].each do |hsp|
52
- hsp_klass = HSP.const_get program.upcase
53
- @queries[i][:hits][j][:hsps].push(hsp_klass.new(*hsp))
54
- end
55
- end
56
- @queries[i].sort_hits_by_evalue!
57
- end
58
- end
21
+ generate
59
22
  end
60
- # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
61
23
 
62
- attr_reader :archive_file
63
- attr_reader :program, :program_version
24
+ attr_reader :search_id, :querydb
64
25
 
65
26
  # :nodoc:
66
- # params are defaults provided by BLAST or user input to tweak the
67
- # result. stats are computed metrics provided by BLAST.
68
- #
69
- # BLAST+ doesn't list all input params (like word_size) in the XML
70
- # output. Only matrix, evalue, gapopen, gapextend, and filters.
27
+ # Attributes parsed out from XML output.
28
+ attr_reader :program, :program_version
71
29
  attr_reader :params, :stats
72
-
73
- attr_reader :querydb
74
-
75
30
  attr_reader :queries
76
31
 
77
- # Helper methods for pretty printing results
32
+ private
78
33
 
79
- def link_per_hit(sequence_id)
80
- links = Links.instance_methods.map { |m| send(m, sequence_id) }
34
+ # Generate report.
35
+ def generate
36
+ xml = Formatter.run(search_id, 'xml').file
37
+ ir = node_to_array(Ox.parse(xml.open.read).root)
38
+ extract_program_info ir
39
+ extract_params ir
40
+ extract_stats ir
41
+ extract_query ir
42
+ end
81
43
 
82
- # Sort links based on :order key (ascending)
83
- links.compact!.sort_by! { |link| link[:order] }
44
+ # Make program name and program name + version available via `program`
45
+ # and `program_version` attributes.
46
+ def extract_program_info(ir)
47
+ @program = ir[0]
48
+ @program_version = ir[1]
84
49
  end
85
50
 
86
- # Returns an array of database objects which contain the queried
87
- # sequence id.
88
- # NOTE: This function may return more than one database object for
89
- # a single sequence id.
51
+ # Make search params available via `params` attribute.
90
52
  #
91
- # e.g., which_blastdb('SI_2.2.23') => [<Database: ...>, ...]
92
- def which_blastdb(sequence_id)
93
- querydb.select { |db| db.include? sequence_id }
53
+ # Search params tweak the results. Like evalue cutoff or penalty to open
54
+ # a gap. BLAST+ doesn't list all input params in the XML output. Only
55
+ # matrix, evalue, gapopen, gapextend, and filters are available from XML
56
+ # output.
57
+ def extract_params(ir)
58
+ params = ir[7]
59
+ @params = {
60
+ :matrix => params[0],
61
+ :evalue => params[1],
62
+ :gapopen => params[2],
63
+ :gapextend => params[3],
64
+ :filters => params[4]
65
+ }
94
66
  end
95
67
 
96
- private
68
+ # Make search stats available via `stats` attribute.
69
+ #
70
+ # Search stats are computed metrics. Like total number of sequences or
71
+ # effective search space.
72
+ def extract_stats(ir)
73
+ stats = ir[8].first[5][0]
74
+ @stats = {
75
+ :nsequences => stats[0],
76
+ :ncharacters => stats[1],
77
+ :hsp_length => stats[2],
78
+ :search_space => stats[3],
79
+ :kappa => stats[4],
80
+ :labmda => stats[5],
81
+ :entropy => stats[6]
82
+ }
83
+ end
84
+
85
+ # Make results for each input query available via `queries` atribute.
86
+ def extract_query(ir)
87
+ ir[8].each do |n|
88
+ query = Query.new(self, n[0], n[2], n[3], [])
89
+ extract_hits(n[4], query)
90
+ query.sort_hits_by_evalue!
91
+ queries << query
92
+ end
93
+ end
94
+
95
+ # Create Hit objects from given ir and associate them to query i.
96
+ def extract_hits(hits_ir, query)
97
+ return if hits_ir == ["\n"] # => No hits.
98
+ hits_ir.each do |n|
99
+ hit = Hit.new(query, n[0], n[1], n[3], n[2], n[4], [])
100
+ extract_hsps(n[5], hit)
101
+ query.hits << hit
102
+ end
103
+ end
104
+
105
+ # Create HSP objects from the given ir and associate them with hit j of
106
+ # query i.
107
+ def extract_hsps(hsp_ir, hit)
108
+ hsp_ir.each do |n|
109
+ hsp_klass = HSP.const_get program.upcase
110
+ hsp = hsp_klass.new(*[hit, *n])
111
+ hit.hsps << hsp
112
+ end
113
+ end
97
114
 
98
115
  PARSEABLE_AS_ARRAY = %w(Parameters BlastOutput_param Iteration_stat
99
116
  Statistics Iteration_hits BlastOutput_iterations
@@ -1,3 +1,4 @@
1
+ require 'yaml'
1
2
  require 'forwardable'
2
3
 
3
4
  # Define Config class.
@@ -23,7 +23,7 @@ module SequenceServer
23
23
 
24
24
  def to_s
25
25
  <<MSG
26
- Error in config file: #{ent}.
26
+ Error reading config file: #{ent}.
27
27
  #{err}
28
28
  MSG
29
29
  end
@@ -10,15 +10,7 @@ module SequenceServer
10
10
 
11
11
  NCBI_ID_PATTERN = /gi\|(\d+)\|/
12
12
 
13
- # Your custom method should have following pattern:
14
- #
15
- # Input
16
- # -----
17
- # sequence_id: Array of sequence ids
18
- #
19
- # Return
20
- # ------
21
- # The return value should be a Hash:
13
+ # Link generators return a Hash like below.
22
14
  #
23
15
  # {
24
16
  # # Required. Display title.
@@ -56,21 +48,21 @@ module SequenceServer
56
48
  # querydb:
57
49
  # Returns an array of databases that were used for BLASTing.
58
50
  #
59
- # which_blastdb:
51
+ # whichdb:
60
52
  # Returns the database from which the given hit came from.
61
53
  #
62
54
  # e.g:
63
55
  #
64
- # hit_database = which_blastdb sequence_id
56
+ # hit_database = whichdb
65
57
  #
66
58
  # Examples:
67
59
  # ---------
68
60
  # See methods provided by default for an example implementation.
69
61
 
70
- def sequence_viewer(sequence_id)
71
- sequence_id = encode sequence_id
62
+ def sequence_viewer
63
+ accession = encode self.accession
72
64
  database_ids = encode querydb.map(&:id).join(' ')
73
- url = "get_sequence/?sequence_ids=#{sequence_id}" \
65
+ url = "get_sequence/?sequence_ids=#{accession}" \
74
66
  "&database_ids=#{database_ids}"
75
67
 
76
68
  {
@@ -82,28 +74,29 @@ module SequenceServer
82
74
  }
83
75
  end
84
76
 
85
- def fasta_download(sequence_id)
86
- sequence_id = encode sequence_id
77
+ def fasta_download
78
+ accession = encode self.accession
87
79
  database_ids = encode querydb.map(&:id).join(' ')
88
- url = "get_sequence/?sequence_ids=#{sequence_id}" \
80
+ url = "get_sequence/?sequence_ids=#{accession}" \
89
81
  "&database_ids=#{database_ids}&download=fasta"
90
82
 
91
83
  {
92
84
  :order => 1,
93
85
  :title => 'FASTA',
94
86
  :url => url,
87
+ :class => 'download',
95
88
  :icon => 'fa-download'
96
89
  }
97
90
  end
98
91
 
99
- def ncbi(sequence_id)
100
- return nil unless sequence_id.match(NCBI_ID_PATTERN)
92
+ def ncbi
93
+ return nil unless id.match(NCBI_ID_PATTERN)
101
94
  ncbi_id = Regexp.last_match[1]
102
95
  ncbi_id = encode ncbi_id
103
- url = "http://www.ncbi.nlm.nih.gov/#{querydb.first.typ}/#{ncbi_id}"
96
+ url = "http://www.ncbi.nlm.nih.gov/#{querydb.first.type}/#{ncbi_id}"
104
97
  {
105
98
  :order => 2,
106
- :title => 'View on NCBI',
99
+ :title => 'NCBI',
107
100
  :url => url,
108
101
  :icon => 'fa-external-link'
109
102
  }
@@ -0,0 +1,163 @@
1
+ require 'json'
2
+ require 'sinatra/base'
3
+
4
+ module SequenceServer
5
+ # Controller.
6
+ class Routes < Sinatra::Base
7
+ # See
8
+ # http://www.sinatrarb.com/configuration.html
9
+ configure do
10
+ # We don't need Rack::MethodOverride. Let's avoid the overhead.
11
+ disable :method_override
12
+
13
+ # Ensure exceptions never leak out of the app. Exceptions raised within
14
+ # the app must be handled by the app. We do this by attaching error
15
+ # blocks to exceptions we know how to handle and attaching to Exception
16
+ # as fallback.
17
+ disable :show_exceptions, :raise_errors
18
+
19
+ # Make it a policy to dump to 'rack.errors' any exception raised by the
20
+ # app so that error handlers don't have to do it themselves. But for it
21
+ # to always work, Exceptions defined by us should not respond to `code`
22
+ # or http_status` methods. Error blocks errors must explicitly set http
23
+ # status, if needed, by calling `status` method.
24
+ # method.
25
+ enable :dump_errors
26
+
27
+ # We don't want Sinatra do setup any loggers for us. We will use our own.
28
+ set :logging, nil
29
+
30
+ # Public, and views directory will be found here.
31
+ set :root, lambda { SequenceServer.root }
32
+ end
33
+
34
+ # See
35
+ # http://www.sinatrarb.com/intro.html#Mime%20Types
36
+ configure do
37
+ mime_type :fasta, 'text/fasta'
38
+ mime_type :xml, 'text/xml'
39
+ mime_type :tsv, 'text/tsv'
40
+ end
41
+
42
+ configure :production do
43
+ set :public_folder,
44
+ lambda { File.join SequenceServer.root, 'public', 'dist' }
45
+ end
46
+
47
+ helpers do
48
+ # Render an anchor element from the given Hash.
49
+ #
50
+ # See links.rb for example of a Hash object that will be rendered.
51
+ def a(link)
52
+ return unless link[:title] && link[:url]
53
+ target = absolute?(link[:url]) && '_blank' || '_self'
54
+ a = %(<a href="#{link[:url]}" class="#{link[:class]}" \
55
+ target="#{target}">)
56
+ a << %(<i class="fa #{link[:icon]}"></i> ) if link[:icon]
57
+ a << "#{link[:title]}</a>"
58
+ end
59
+
60
+ # Is the given URI absolute? (or relative?)
61
+ #
62
+ # Returns false if nil is passed.
63
+ def absolute?(uri)
64
+ uri && URI.parse(uri).absolute?
65
+ end
66
+
67
+ # Prettify given data.
68
+ def prettify(data)
69
+ return prettify_tuple(data) if tuple? data
70
+ return prettify_float(data) if data.is_a? Float
71
+ data
72
+ end
73
+
74
+ # Formats float as "a.bcd" or "a x b^c". The latter if float is
75
+ # scientific notation. Former otherwise.
76
+ def prettify_float(float)
77
+ float.to_s.sub(/(\d*\.\d*)e?([+-]\d*)?/) do
78
+ base = Regexp.last_match[1]
79
+ power = Regexp.last_match[2]
80
+ s = format '%.2f', base
81
+ s << " &times; 10<sup>#{power}</sup>" if power
82
+ s
83
+ end
84
+ end
85
+
86
+ # Formats an array of two elements as "first (last)".
87
+ def prettify_tuple(tuple)
88
+ "#{tuple.first} (#{tuple.last})"
89
+ end
90
+
91
+ # Is the given value a tuple? (array of length two).
92
+ def tuple?(data)
93
+ return true if data.is_a?(Array) && data.length == 2
94
+ end
95
+ end
96
+
97
+ # For any request that hits the app in development mode, log incoming
98
+ # params.
99
+ before do
100
+ logger.debug params
101
+ end
102
+
103
+ # Render the search form.
104
+ get '/' do
105
+ erb :search, :locals => { :databases => Database.group_by(&:type) }
106
+ end
107
+
108
+ # BLAST search!
109
+ post '/' do
110
+ erb :result, :locals => { :report => BLAST.run(params) }
111
+ end
112
+
113
+ # @params sequence_ids: whitespace separated list of sequence ids to
114
+ # retrieve
115
+ # @params database_ids: whitespace separated list of database ids to
116
+ # retrieve the sequence from.
117
+ # @params download: whether to return raw response or initiate file
118
+ # download
119
+ #
120
+ # Use whitespace to separate entries in sequence_ids (all other chars exist
121
+ # in identifiers) and retreival_databases (we don't allow whitespace in a
122
+ # database's name, so it's safe).
123
+ get '/get_sequence/' do
124
+ sequence_ids = params[:sequence_ids].split(/\s/)
125
+ database_ids = params[:database_ids].split(/\s/)
126
+
127
+ sequences = Sequence::Retriever.new(sequence_ids, database_ids,
128
+ params[:download])
129
+
130
+ send_file(sequences.file.path,
131
+ :type => sequences.mime,
132
+ :filename => sequences.filename) if params[:download]
133
+
134
+ sequences.to_json
135
+ end
136
+
137
+ # Download BLAST report in various formats.
138
+ get '/download/:search_id.:type' do
139
+ out = BLAST::Formatter.new(params[:search_id], params[:type])
140
+ send_file out.file.path, :filename => out.filename, :type => out.mime
141
+ end
142
+
143
+ # This error block will only ever be hit if the user gives us a funny
144
+ # sequence or incorrect advanced parameter. Well, we could hit this block
145
+ # if someone is playing around with our HTTP API too.
146
+ error BLAST::ArgumentError do
147
+ status 400
148
+ error = env['sinatra.error']
149
+ erb :'400', :locals => { :error => error }
150
+ end
151
+
152
+ # This will catch any unhandled error and some very special errors. Ideally
153
+ # we will never hit this block. If we do, there's a bug in SequenceServer
154
+ # or something really weird going on. If we hit this error block we show
155
+ # the stacktrace to the user requesting them to post the same to our Google
156
+ # Group.
157
+ error Exception, BLAST::RuntimeError do
158
+ status 500
159
+ error = env['sinatra.error']
160
+ erb :'500', :locals => { :error => error }
161
+ end
162
+ end
163
+ end