sequenceserver 0.8.9 → 1.0.0.pre.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sequenceserver might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/{README.txt → README.md} +2 -0
- data/bin/sequenceserver +255 -55
- data/config.ru +2 -4
- data/lib/sequenceserver.rb +293 -447
- data/lib/sequenceserver/blast.rb +464 -64
- data/lib/sequenceserver/database.rb +185 -19
- data/lib/sequenceserver/links.rb +114 -0
- data/lib/sequenceserver/logger.rb +27 -0
- data/lib/sequenceserver/sequence.rb +141 -0
- data/public/css/bootstrap.min.css +8 -413
- data/public/css/custom.css +363 -122
- data/public/css/font-awesome.min.css +4 -0
- data/public/fonts/FontAwesome.otf +0 -0
- data/public/fonts/fontawesome-webfont.eot +0 -0
- data/public/fonts/fontawesome-webfont.svg +565 -0
- data/public/fonts/fontawesome-webfont.ttf +0 -0
- data/public/fonts/fontawesome-webfont.woff +0 -0
- data/public/fonts/fontawesome-webfont.woff2 +0 -0
- data/public/js/bootstrap.min.js +11 -0
- data/public/js/d3.v3.min.js +5 -0
- data/public/js/html5shiv.min.js +4 -0
- data/public/js/jquery.scrollspy.js +74 -0
- data/public/js/jquery.t.js +353 -0
- data/public/js/sequence.js +2419 -0
- data/public/js/sequenceserver.blast.js +29 -30
- data/public/js/sequenceserver.js +544 -120
- data/public/js/underscore.min.js +6 -0
- data/public/js/webshims/polyfiller.js +1 -0
- data/public/js/webshims/shims/FlashCanvas/canvas2png.js +1 -0
- data/public/js/webshims/shims/FlashCanvas/flashcanvas.js +1 -0
- data/public/js/webshims/shims/FlashCanvas/flashcanvas.swf +0 -0
- data/public/js/webshims/shims/FlashCanvasPro/canvas2png.js +1 -0
- data/public/js/webshims/shims/FlashCanvasPro/flash10canvas.swf +0 -0
- data/public/js/webshims/shims/FlashCanvasPro/flash9canvas.swf +0 -0
- data/public/js/webshims/shims/FlashCanvasPro/flashcanvas.js +1 -0
- data/public/js/webshims/shims/canvas-blob.js +1 -0
- data/public/js/webshims/shims/color-picker.js +2 -0
- data/public/js/webshims/shims/combos/1.js +6 -0
- data/public/js/webshims/shims/combos/10.js +2 -0
- data/public/js/webshims/shims/combos/11.js +2 -0
- data/public/js/webshims/shims/combos/12.js +6 -0
- data/public/js/webshims/shims/combos/13.js +1 -0
- data/public/js/webshims/shims/combos/14.js +1 -0
- data/public/js/webshims/shims/combos/15.js +2 -0
- data/public/js/webshims/shims/combos/16.js +7 -0
- data/public/js/webshims/shims/combos/17.js +2 -0
- data/public/js/webshims/shims/combos/18.js +3 -0
- data/public/js/webshims/shims/combos/2.js +7 -0
- data/public/js/webshims/shims/combos/21.js +2 -0
- data/public/js/webshims/shims/combos/22.js +1 -0
- data/public/js/webshims/shims/combos/23.js +6 -0
- data/public/js/webshims/shims/combos/25.js +2 -0
- data/public/js/webshims/shims/combos/27.js +1 -0
- data/public/js/webshims/shims/combos/28.js +1 -0
- data/public/js/webshims/shims/combos/29.js +1 -0
- data/public/js/webshims/shims/combos/3.js +1 -0
- data/public/js/webshims/shims/combos/30.js +2 -0
- data/public/js/webshims/shims/combos/31.js +1 -0
- data/public/js/webshims/shims/combos/33.js +1 -0
- data/public/js/webshims/shims/combos/34.js +1 -0
- data/public/js/webshims/shims/combos/4.js +1 -0
- data/public/js/webshims/shims/combos/5.js +2 -0
- data/public/js/webshims/shims/combos/6.js +2 -0
- data/public/js/webshims/shims/combos/7.js +7 -0
- data/public/js/webshims/shims/combos/8.js +7 -0
- data/public/js/webshims/shims/combos/9.js +2 -0
- data/public/js/webshims/shims/combos/97.js +1 -0
- data/public/js/webshims/shims/combos/98.js +1 -0
- data/public/js/webshims/shims/combos/99.js +1 -0
- data/public/js/webshims/shims/details.js +1 -0
- data/public/js/webshims/shims/dom-extend.js +1 -0
- data/public/js/webshims/shims/es5.js +1 -0
- data/public/js/webshims/shims/es6.js +1 -0
- data/public/js/webshims/shims/excanvas.js +1 -0
- data/public/js/webshims/shims/filereader-xhr.js +1 -0
- data/public/js/webshims/shims/form-combat.js +1 -0
- data/public/js/webshims/shims/form-core.js +1 -0
- data/public/js/webshims/shims/form-datalist-lazy.js +1 -0
- data/public/js/webshims/shims/form-datalist.js +1 -0
- data/public/js/webshims/shims/form-fixrangechange.js +1 -0
- data/public/js/webshims/shims/form-inputmode.js +1 -0
- data/public/js/webshims/shims/form-message.js +1 -0
- data/public/js/webshims/shims/form-native-extend.js +1 -0
- data/public/js/webshims/shims/form-number-date-api.js +1 -0
- data/public/js/webshims/shims/form-number-date-ui.js +1 -0
- data/public/js/webshims/shims/form-shim-extend.js +1 -0
- data/public/js/webshims/shims/form-shim-extend2.js +1 -0
- data/public/js/webshims/shims/form-validation.js +1 -0
- data/public/js/webshims/shims/form-validators.js +1 -0
- data/public/js/webshims/shims/forms-picker.js +1 -0
- data/public/js/webshims/shims/geolocation.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-ar.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-ch-CN.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-cs.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-de.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-el.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-en.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-es.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-fa.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-fr.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-he.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-hi.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-hu.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-it.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-ja.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-lt.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-nl.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-pl.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-pt-BR.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-pt-PT.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-pt.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-ru.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-sv.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-zh-CN.js +1 -0
- data/public/js/webshims/shims/i18n/formcfg-zh-TW.js +1 -0
- data/public/js/webshims/shims/jme/alternate-media.js +1 -0
- data/public/js/webshims/shims/jme/base.js +1 -0
- data/public/js/webshims/shims/jme/controls.css +1 -0
- data/public/js/webshims/shims/jme/jme.eot +0 -0
- data/public/js/webshims/shims/jme/jme.svg +36 -0
- data/public/js/webshims/shims/jme/jme.ttf +0 -0
- data/public/js/webshims/shims/jme/jme.woff +0 -0
- data/public/js/webshims/shims/jme/mediacontrols-lazy.js +1 -0
- data/public/js/webshims/shims/jme/mediacontrols.js +1 -0
- data/public/js/webshims/shims/jme/playlist.js +1 -0
- data/public/js/webshims/shims/jpicker/images/AlphaBar.png +0 -0
- data/public/js/webshims/shims/jpicker/images/Bars.png +0 -0
- data/public/js/webshims/shims/jpicker/images/Maps.png +0 -0
- data/public/js/webshims/shims/jpicker/images/NoColor.png +0 -0
- data/public/js/webshims/shims/jpicker/images/bar-opacity.png +0 -0
- data/public/js/webshims/shims/jpicker/images/map-opacity.png +0 -0
- data/public/js/webshims/shims/jpicker/images/mappoint.gif +0 -0
- data/public/js/webshims/shims/jpicker/images/picker.gif +0 -0
- data/public/js/webshims/shims/jpicker/images/preview-opacity.png +0 -0
- data/public/js/webshims/shims/jpicker/images/rangearrows.gif +0 -0
- data/public/js/webshims/shims/jpicker/jpicker.css +1 -0
- data/public/js/webshims/shims/matchMedia.js +3 -0
- data/public/js/webshims/shims/mediacapture-picker.js +1 -0
- data/public/js/webshims/shims/mediacapture.js +1 -0
- data/public/js/webshims/shims/mediaelement-core.js +1 -0
- data/public/js/webshims/shims/mediaelement-debug.js +1 -0
- data/public/js/webshims/shims/mediaelement-jaris.js +1 -0
- data/public/js/webshims/shims/mediaelement-native-fix.js +1 -0
- data/public/js/webshims/shims/mediaelement-yt.js +1 -0
- data/public/js/webshims/shims/moxie/flash/Moxie.cdn.swf +0 -0
- data/public/js/webshims/shims/moxie/flash/Moxie.min.swf +0 -0
- data/public/js/webshims/shims/moxie/js/moxie-html4.js +3 -0
- data/public/js/webshims/shims/moxie/js/moxie-swf.js +2 -0
- data/public/js/webshims/shims/picture.js +1 -0
- data/public/js/webshims/shims/plugins/jquery.ui.position.js +11 -0
- data/public/js/webshims/shims/range-ui.js +1 -0
- data/public/js/webshims/shims/sizzle.js +11 -0
- data/public/js/webshims/shims/sticky.js +1 -0
- data/public/js/webshims/shims/styles/color-picker.png +0 -0
- data/public/js/webshims/shims/styles/forms-ext.css +1 -0
- data/public/js/webshims/shims/styles/forms-picker.css +1 -0
- data/public/js/webshims/shims/styles/progress.gif +0 -0
- data/public/js/webshims/shims/styles/progress.png +0 -0
- data/public/js/webshims/shims/styles/shim-ext.css +1 -0
- data/public/js/webshims/shims/styles/shim.css +1 -0
- data/public/js/webshims/shims/styles/transparent.png +0 -0
- data/public/js/webshims/shims/styles/widget.eot +0 -0
- data/public/js/webshims/shims/styles/widget.svg +12 -0
- data/public/js/webshims/shims/styles/widget.ttf +0 -0
- data/public/js/webshims/shims/styles/widget.woff +0 -0
- data/public/js/webshims/shims/swf/JarisFLVPlayer.swf +0 -0
- data/public/js/webshims/shims/swfmini-embed.js +1 -0
- data/public/js/webshims/shims/swfmini.js +6 -0
- data/public/js/webshims/shims/track-ui.js +1 -0
- data/public/js/webshims/shims/track.js +1 -0
- data/public/js/webshims/shims/url.js +1 -0
- data/public/js/webshims/shims/usermedia-core.js +1 -0
- data/public/js/webshims/shims/usermedia-shim.js +1 -0
- data/sequenceserver.gemspec +16 -13
- data/views/400.erb +28 -0
- data/views/500.erb +35 -19
- data/views/_options.erb +6 -15
- data/views/result.erb +218 -0
- data/views/search.erb +354 -151
- metadata +254 -62
- data/example.config.yml +0 -39
- data/lib/sequenceserver/customisation.rb +0 -60
- data/lib/sequenceserver/database_formatter.rb +0 -190
- data/lib/sequenceserver/helpers.rb +0 -136
- data/lib/sequenceserver/sequencehelpers.rb +0 -93
- data/lib/sequenceserver/sinatralikeloggerformatter.rb +0 -12
- data/lib/sequenceserver/version.rb +0 -9
- data/public/css/beige.css.css +0 -254
- data/public/css/bootstrap.dropdown.css +0 -29
- data/public/css/bootstrap.icons.css +0 -155
- data/public/css/bootstrap.modal.css +0 -28
- data/public/js/bootstrap.dropdown.js +0 -92
- data/public/js/bootstrap.modal.js +0 -7
- data/public/js/bootstrap.transition.js +0 -7
- data/public/js/jquery-scrollspy.js +0 -98
- data/public/js/jquery.activity.js +0 -10
- data/public/js/jquery.enablePlaceholder.min.js +0 -10
- data/public/js/store.min.js +0 -2
- data/public/sequence.html +0 -28
- data/tests/database/nucleotide/Sinvicta2-2-3.cdna.subset.fasta +0 -5486
- data/tests/database/nucleotide/Sinvicta2-2-3.cdna.subset.fasta.nhr +0 -0
- data/tests/database/nucleotide/Sinvicta2-2-3.cdna.subset.fasta.nin +0 -0
- data/tests/database/nucleotide/Sinvicta2-2-3.cdna.subset.fasta.nsq +0 -0
- data/tests/database/protein/Sinvicta2-2-3.prot.subset.fasta +0 -6449
- data/tests/database/protein/Sinvicta2-2-3.prot.subset.fasta.phr +0 -0
- data/tests/database/protein/Sinvicta2-2-3.prot.subset.fasta.pin +0 -0
- data/tests/database/protein/Sinvicta2-2-3.prot.subset.fasta.psq +0 -0
- data/tests/run +0 -26
- data/tests/test_sequencehelpers.rb +0 -77
- data/tests/test_sequenceserver_blast.rb +0 -60
- data/tests/test_ui.rb +0 -104
- data/tests/ui.specs.todo +0 -10
@@ -1,29 +1,195 @@
|
|
1
|
+
require 'find'
|
1
2
|
require 'digest/md5'
|
3
|
+
require 'forwardable'
|
4
|
+
|
5
|
+
require 'sequenceserver/sequence'
|
2
6
|
|
3
7
|
module SequenceServer
|
4
|
-
class Database < Struct.new("Database", :name, :title, :type)
|
5
|
-
def to_s
|
6
|
-
"#{type}: #{title} #{name}"
|
7
|
-
end
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
9
|
+
# Captures a directory containing FASTA files and BLAST databases.
|
10
|
+
#
|
11
|
+
# Formatting a FASTA for use with BLAST+ will create 3 or 6 files,
|
12
|
+
# collectively referred to as a BLAST database.
|
13
|
+
#
|
14
|
+
# It is important that formatted BLAST database files have the same dirname and
|
15
|
+
# basename as the source FASTA for SequenceServer to be able to tell formatted
|
16
|
+
# FASTA from unformatted. And that FASTA files be formatted with `parse_seqids`
|
17
|
+
# option of `makeblastdb` for sequence retrieval to work.
|
18
|
+
#
|
19
|
+
# SequenceServer will always place BLAST database files alongside input FASTA,
|
20
|
+
# and use `parse_seqids` option of `makeblastdb` to format databases.
|
21
|
+
class Database < Struct.new(:name, :title, :type, :nsequences, :ncharacters, :updated_on)
|
22
|
+
|
23
|
+
class << self
|
24
|
+
|
25
|
+
include Enumerable
|
26
|
+
|
27
|
+
extend Forwardable
|
28
|
+
|
29
|
+
def_delegators SequenceServer, :config, :logger
|
30
|
+
|
31
|
+
def collection
|
32
|
+
@collection ||= {}
|
33
|
+
end
|
34
|
+
|
35
|
+
private :collection
|
36
|
+
|
37
|
+
def <<(database)
|
38
|
+
collection[database.id] = database
|
39
|
+
end
|
40
|
+
|
41
|
+
def [](ids)
|
42
|
+
ids = Array ids
|
43
|
+
collection.values_at(*ids)
|
44
|
+
end
|
45
|
+
|
46
|
+
def ids
|
47
|
+
collection.keys
|
48
|
+
end
|
49
|
+
|
50
|
+
def all
|
51
|
+
collection.values
|
52
|
+
end
|
53
|
+
|
54
|
+
def each(&block)
|
55
|
+
all.each(&block)
|
56
|
+
end
|
57
|
+
|
58
|
+
def include?(path)
|
59
|
+
collection.include? Digest::MD5.hexdigest path
|
60
|
+
end
|
61
|
+
|
62
|
+
def group_by(&block)
|
63
|
+
all.group_by(&block)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Intended to be used only for testing.
|
67
|
+
def first
|
68
|
+
all.first
|
69
|
+
end
|
70
|
+
|
71
|
+
# Recurisvely scan `database_dir` for blast databases.
|
72
|
+
def scan_databases_dir
|
73
|
+
database_dir = config[:database_dir]
|
74
|
+
list = %x|blastdbcmd -recursive -list #{database_dir} -list_outfmt "%f %t %p %n %l %d" 2>&1|
|
75
|
+
list.each_line do |line|
|
76
|
+
name = line.split(' ')[0]
|
77
|
+
next if multipart_database_name?(name)
|
78
|
+
self << Database.new(*line.split(' '))
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# Recursively scan `database_dir` for un-formatted FASTA and format them
|
83
|
+
# for use with BLAST+.
|
84
|
+
def make_blast_databases
|
85
|
+
unformatted_fastas.each do |file, sequence_type|
|
86
|
+
make_blast_database(file, sequence_type)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
# Returns an Array of FASTA files that may require formatting, and the type
|
91
|
+
# of sequence contained in each FASTA.
|
92
|
+
#
|
93
|
+
# > unformatted_fastas
|
94
|
+
# => [['/foo/bar.fasta', :nulceotide], ...]
|
95
|
+
def unformatted_fastas
|
96
|
+
list = []
|
97
|
+
database_dir = config[:database_dir]
|
98
|
+
Find.find database_dir do |file|
|
99
|
+
next if File.directory?(file)
|
100
|
+
next if Database.include? file
|
101
|
+
if probably_fasta? file
|
102
|
+
sequence_type = guess_sequence_type_in_fasta file
|
103
|
+
if [:protein, :nucleotide].include?(sequence_type)
|
104
|
+
list << [file, sequence_type]
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
list
|
109
|
+
end
|
110
|
+
|
111
|
+
# Create BLAST database, given FASTA file and sequence type in FASTA file.
|
112
|
+
def make_blast_database(file, type)
|
113
|
+
puts "FASTA file: #{file}"
|
114
|
+
puts "FASTA type: #{type}"
|
115
|
+
|
116
|
+
print "Proceed? [y/n] (Default: y): "
|
117
|
+
response = STDIN.gets.to_s.strip
|
118
|
+
|
119
|
+
unless response.match(/n/i)
|
120
|
+
default_title = make_db_title(File.basename(file))
|
121
|
+
print "Enter a database title or will use '#{default_title}': "
|
122
|
+
title = STDIN.gets.to_s
|
123
|
+
title = default_title if title.strip.empty?
|
124
|
+
|
125
|
+
`makeblastdb -parse_seqids -hash_index \
|
126
|
+
-in #{file} -dbtype #{type.to_s.slice(0,4)} -title "#{title}"`
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
# Returns true if the database name appears to be a multi-part database name.
|
131
|
+
#
|
132
|
+
# e.g.
|
133
|
+
# /home/ben/pd.ben/sequenceserver/db/nr.00 => yes
|
134
|
+
# /home/ben/pd.ben/sequenceserver/db/nr => no
|
135
|
+
# /home/ben/pd.ben/sequenceserver/db/img3.5.finished.faa.01 => yes
|
136
|
+
def multipart_database_name?(db_name)
|
137
|
+
!(db_name.match(/.+\/\S+\d{2}$/).nil?)
|
138
|
+
end
|
139
|
+
|
140
|
+
# Returns true if first character of the file is '>'.
|
141
|
+
def probably_fasta?(file)
|
142
|
+
File.read(file, 1) == '>'
|
143
|
+
end
|
144
|
+
|
145
|
+
# Suggests improved titles when generating database names from files
|
146
|
+
# for improved apperance and readability in web interface.
|
147
|
+
# For example:
|
148
|
+
# Cobs1.4.proteins.fasta -> Cobs 1.4 proteins
|
149
|
+
# S_invicta.xx.2.5.small.nucl.fa -> S invicta xx 2.5 small nucl
|
150
|
+
def make_db_title(db_name)
|
151
|
+
db_name.gsub!('"', "'")
|
152
|
+
# removes .fasta like extension names
|
153
|
+
db_name.gsub!(File.extname(db_name), '')
|
154
|
+
# replaces _ with ' ',
|
155
|
+
db_name.gsub!(/(_)/, ' ')
|
156
|
+
# replaces '.' with ' ' when no numbers are on either side,
|
157
|
+
db_name.gsub!(/(?<![0-9])\.(?![0-9])/, ' ')
|
158
|
+
# preserves version numbers
|
159
|
+
db_name.gsub!(/\W*(\d+([.-]\d+)+)\W*/, ' \1 ')
|
160
|
+
db_name
|
161
|
+
end
|
162
|
+
|
163
|
+
# Guess whether FASTA file contains protein or nucleotide sequences based
|
164
|
+
# on first 32768 characters.
|
165
|
+
#
|
166
|
+
# NOTE: 2^15 == 32786. Approximately 546 lines, assuming 60 characters on
|
167
|
+
# each line.
|
168
|
+
def guess_sequence_type_in_fasta(file)
|
169
|
+
sample = File.read(file, 32768)
|
170
|
+
sequences = sample.split(/^>.+$/).delete_if { |seq| seq.empty? }
|
171
|
+
sequence_types = sequences.map {|seq| Sequence.guess_type(seq)}.uniq.compact
|
172
|
+
(sequence_types.length == 1) && sequence_types.first
|
22
173
|
end
|
23
174
|
end
|
24
175
|
|
25
|
-
def
|
26
|
-
|
176
|
+
def initialize(*args)
|
177
|
+
args[2].downcase! # database type
|
178
|
+
args.each(&:freeze)
|
179
|
+
super
|
180
|
+
|
181
|
+
@id = Digest::MD5.hexdigest args.first
|
182
|
+
end
|
183
|
+
|
184
|
+
attr_reader :id
|
185
|
+
|
186
|
+
def include?(accession)
|
187
|
+
out = `blastdbcmd -entry '#{accession}' -db #{name} 2> /dev/null`
|
188
|
+
not out.empty?
|
189
|
+
end
|
190
|
+
|
191
|
+
def to_s
|
192
|
+
"#{type}: #{title} #{name}"
|
27
193
|
end
|
28
194
|
end
|
29
195
|
end
|
@@ -0,0 +1,114 @@
|
|
1
|
+
module SequenceServer
|
2
|
+
# Module to contain methods for generating sequence retrieval links.
|
3
|
+
module Links
|
4
|
+
require 'erb'
|
5
|
+
|
6
|
+
# Provide a method to URL encode _query parameters_. See [1].
|
7
|
+
include ERB::Util
|
8
|
+
#
|
9
|
+
alias encode url_encode
|
10
|
+
|
11
|
+
NCBI_ID_PATTERN = /gi\|(\d+)\|/
|
12
|
+
|
13
|
+
# Your custom method should have following pattern:
|
14
|
+
#
|
15
|
+
# Input
|
16
|
+
# -----
|
17
|
+
# sequence_id: Array of sequence ids
|
18
|
+
#
|
19
|
+
# Return
|
20
|
+
# ------
|
21
|
+
# The return value should be a Hash:
|
22
|
+
#
|
23
|
+
# {
|
24
|
+
# # Required. Display title.
|
25
|
+
# :title => "title",
|
26
|
+
#
|
27
|
+
# # Required. Generated url.
|
28
|
+
# :url => url,
|
29
|
+
#
|
30
|
+
# # Optional. Left-right order in which the link should appear.
|
31
|
+
# :order => num,
|
32
|
+
#
|
33
|
+
# # Optional. Classes, if any, to apply to the link.
|
34
|
+
# :class => "class1 class2",
|
35
|
+
#
|
36
|
+
# # Optional. Class name of a FontAwesome icon to use.
|
37
|
+
# :icon => "fa-icon-class"
|
38
|
+
# }
|
39
|
+
#
|
40
|
+
# If no url could be generated, return nil.
|
41
|
+
#
|
42
|
+
# Helper methods
|
43
|
+
# --------------
|
44
|
+
#
|
45
|
+
# Following helper methods are available to help with link generation.
|
46
|
+
#
|
47
|
+
# encode:
|
48
|
+
# URL encode query params.
|
49
|
+
#
|
50
|
+
# Don't use this function to encode the entire URL. Only params.
|
51
|
+
#
|
52
|
+
# e.g:
|
53
|
+
# sequence_id = encode sequence_id
|
54
|
+
# url = "http://www.ncbi.nlm.nih.gov/nucleotide/#{sequence_id}"
|
55
|
+
#
|
56
|
+
# querydb:
|
57
|
+
# Returns an array of databases that were used for BLASTing.
|
58
|
+
#
|
59
|
+
# which_blastdb:
|
60
|
+
# Returns the database from which the given hit came from.
|
61
|
+
#
|
62
|
+
# e.g:
|
63
|
+
#
|
64
|
+
# hit_database = which_blastdb sequence_id
|
65
|
+
#
|
66
|
+
# Examples:
|
67
|
+
# ---------
|
68
|
+
# See methods provided by default for an example implementation.
|
69
|
+
|
70
|
+
def sequence_viewer(sequence_id)
|
71
|
+
sequence_id = encode sequence_id
|
72
|
+
database_ids = encode querydb.map(&:id).join(' ')
|
73
|
+
url = "get_sequence/?sequence_ids=#{sequence_id}" \
|
74
|
+
"&database_ids=#{database_ids}"
|
75
|
+
|
76
|
+
{
|
77
|
+
:order => 0,
|
78
|
+
:url => url,
|
79
|
+
:title => 'Sequence',
|
80
|
+
:class => 'view-sequence',
|
81
|
+
:icon => 'fa-eye'
|
82
|
+
}
|
83
|
+
end
|
84
|
+
|
85
|
+
def fasta_download(sequence_id)
|
86
|
+
sequence_id = encode sequence_id
|
87
|
+
database_ids = encode querydb.map(&:id).join(' ')
|
88
|
+
url = "get_sequence/?sequence_ids=#{sequence_id}" \
|
89
|
+
"&database_ids=#{database_ids}&download=fasta"
|
90
|
+
|
91
|
+
{
|
92
|
+
:order => 1,
|
93
|
+
:title => 'FASTA',
|
94
|
+
:url => url,
|
95
|
+
:icon => 'fa-download'
|
96
|
+
}
|
97
|
+
end
|
98
|
+
|
99
|
+
def ncbi(sequence_id)
|
100
|
+
return nil unless sequence_id.match(NCBI_ID_PATTERN)
|
101
|
+
ncbi_id = Regexp.last_match[1]
|
102
|
+
ncbi_id = encode ncbi_id
|
103
|
+
url = "http://www.ncbi.nlm.nih.gov/#{querydb.first.typ}/#{ncbi_id}"
|
104
|
+
{
|
105
|
+
:order => 2,
|
106
|
+
:title => 'View on NCBI',
|
107
|
+
:url => url,
|
108
|
+
:icon => 'fa-external-link'
|
109
|
+
}
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# [1]: https://stackoverflow.com/questions/2824126/whats-the-difference-between-uri-escape-and-cgi-escape
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
module SequenceServer
|
4
|
+
|
5
|
+
class Logger < Logger
|
6
|
+
|
7
|
+
def initialize(dev, verbose = false)
|
8
|
+
super dev
|
9
|
+
self.level = verbose ? DEBUG : INFO
|
10
|
+
self.formatter = Formatter.new
|
11
|
+
end
|
12
|
+
|
13
|
+
# We change Logging format so that it is consistent with Sinatra's
|
14
|
+
class Formatter < Formatter
|
15
|
+
|
16
|
+
Format = "[%s] %s %s\n"
|
17
|
+
|
18
|
+
def initialize
|
19
|
+
self.datetime_format = "%Y-%m-%d %H:%M:%S"
|
20
|
+
end
|
21
|
+
|
22
|
+
def call(severity, time, progname, msg)
|
23
|
+
Format % [format_datetime(time), severity, msg2str(msg)]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,141 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
|
3
|
+
module SequenceServer
|
4
|
+
|
5
|
+
# Provides simple sequence processing utilities via class methods. Instance
|
6
|
+
# of the class serves as a simple data object to captures sequences fetched
|
7
|
+
# from BLAST databases.
|
8
|
+
#
|
9
|
+
# NOTE:
|
10
|
+
# What all do we need to consistently construct FASTA from `blastdbcmd's`
|
11
|
+
# output?
|
12
|
+
#
|
13
|
+
# It would seem rather straightforward. But it's not.
|
14
|
+
#
|
15
|
+
# FASTA format:
|
16
|
+
#
|
17
|
+
# >defline
|
18
|
+
# actual sequence
|
19
|
+
#
|
20
|
+
# where,
|
21
|
+
#
|
22
|
+
# defline = >id title
|
23
|
+
#
|
24
|
+
# ID of a sequence fetched from nr database should look like this:
|
25
|
+
#
|
26
|
+
# sequence id -> self.seqid
|
27
|
+
# -------------
|
28
|
+
# accession -> self.accession
|
29
|
+
# ----------
|
30
|
+
# gi|322796550|gb|EFZ19024.1| -> self.id
|
31
|
+
# ---------
|
32
|
+
# gi number -> self.gi
|
33
|
+
#
|
34
|
+
# while for local databases, the id should be the exact same, as in the
|
35
|
+
# original FASTA file:
|
36
|
+
#
|
37
|
+
# SI2.2.0_06267 -> self.id == self.seqid == self.accession.
|
38
|
+
class Sequence < Struct.new(:gi, :seqid, :accession, :title, :value)
|
39
|
+
|
40
|
+
class << self
|
41
|
+
|
42
|
+
extend Forwardable
|
43
|
+
|
44
|
+
# Derive `logger` from SequenceServer module.
|
45
|
+
def_delegators SequenceServer, :logger
|
46
|
+
|
47
|
+
# Disable using `Sequence.new`. Should use `Sequence.from_blastdb`
|
48
|
+
# instead.
|
49
|
+
private :new
|
50
|
+
|
51
|
+
# Returns an Array of `Sequence` objects each capturing a sequence
|
52
|
+
# fetched from BLAST database.
|
53
|
+
def from_blastdb(accessions, database_ids)
|
54
|
+
accessions = Array accessions
|
55
|
+
database_ids = Array database_ids
|
56
|
+
|
57
|
+
accessions = accessions.join(',')
|
58
|
+
database_names = Database[database_ids].map(&:name).join(' ')
|
59
|
+
|
60
|
+
# Output of the command will be five columns TSV.
|
61
|
+
command = "blastdbcmd -outfmt '%g %i %a %t %s'" \
|
62
|
+
" -db '#{database_names}' -entry '#{accessions}'"
|
63
|
+
|
64
|
+
logger.debug("Executing: #{command}")
|
65
|
+
|
66
|
+
# Not interested in stderr.
|
67
|
+
`#{command} 2> /dev/null`.
|
68
|
+
each_line.map {|line| new(*line.chomp.split(' '))}
|
69
|
+
end
|
70
|
+
|
71
|
+
# Strips all non-letter characters. If less than 10 useable characters
|
72
|
+
# return `nil`. If at least 90% is ACGTU, returns `:nucleotide`, else
|
73
|
+
# `:protein`.
|
74
|
+
def guess_type(sequence)
|
75
|
+
# Clean the sequence: first remove non-letter characters, then
|
76
|
+
# ambiguous characters.
|
77
|
+
cleaned_sequence = sequence.gsub(/[^A-Z]/i, '').gsub(/[NX]/i, '')
|
78
|
+
|
79
|
+
return if cleaned_sequence.length < 10 # conservative
|
80
|
+
|
81
|
+
# Count putative NA in the sequence.
|
82
|
+
na_count = 0
|
83
|
+
composition = composition(cleaned_sequence)
|
84
|
+
composition.each do |character, count|
|
85
|
+
na_count = na_count + count if character.match(/[ACGTU]/i)
|
86
|
+
end
|
87
|
+
|
88
|
+
na_count > (0.9 * cleaned_sequence.length) ? :nucleotide : :protein
|
89
|
+
end
|
90
|
+
|
91
|
+
# Copied from BioRuby's `Bio::Sequence` class.
|
92
|
+
#
|
93
|
+
# > composition("asdfasdfffffasdf")
|
94
|
+
# => {"a"=>3, "d"=>3, "f"=>7, "s"=>3}
|
95
|
+
def composition(sequence_string)
|
96
|
+
count = Hash.new(0)
|
97
|
+
sequence_string.scan(/./) do |x|
|
98
|
+
count[x] += 1
|
99
|
+
end
|
100
|
+
count
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def initialize(*args)
|
105
|
+
args[0] = nil if args[0] == 'N/A'
|
106
|
+
super
|
107
|
+
end
|
108
|
+
|
109
|
+
# Returns FASTA sequence id.
|
110
|
+
def id
|
111
|
+
(gi ? ['gi', gi, seqid] : [seqid]).join('|')
|
112
|
+
end
|
113
|
+
|
114
|
+
# Returns length of the sequence.
|
115
|
+
def length
|
116
|
+
value.length
|
117
|
+
end
|
118
|
+
|
119
|
+
# Returns sequence value.
|
120
|
+
def to_s
|
121
|
+
value
|
122
|
+
end
|
123
|
+
|
124
|
+
def info
|
125
|
+
{:value => value, :id => id, :title => title}
|
126
|
+
end
|
127
|
+
|
128
|
+
# Returns FASTA formatted sequence.
|
129
|
+
def fasta
|
130
|
+
chars = 60
|
131
|
+
lines = (length / chars.to_f).ceil
|
132
|
+
defline = ">#{id} #{title}"
|
133
|
+
seqlines = (1..lines).map {|i| to_s[chars * (i - 1), chars]}
|
134
|
+
[defline].concat(seqlines).join("\n")
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
# References
|
140
|
+
# ----------
|
141
|
+
# [1]: http://blast.ncbi.nlm.nih.gov/blastcgihelp.shtml
|