sequenceserver 1.1.0.beta12 → 2.0.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (208) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec +1 -0
  3. data/.travis.yml +5 -5
  4. data/README.md +86 -43
  5. data/bin/sequenceserver +29 -32
  6. data/lib/sequenceserver.rb +86 -123
  7. data/lib/sequenceserver/api_errors.rb +3 -1
  8. data/lib/sequenceserver/blast/hit.rb +48 -28
  9. data/lib/sequenceserver/blast/job.rb +28 -8
  10. data/lib/sequenceserver/blast/report.rb +52 -15
  11. data/lib/sequenceserver/database.rb +71 -0
  12. data/lib/sequenceserver/exceptions.rb +5 -10
  13. data/lib/sequenceserver/job.rb +8 -1
  14. data/lib/sequenceserver/links.rb +49 -17
  15. data/lib/sequenceserver/logger.rb +2 -2
  16. data/lib/sequenceserver/routes.rb +20 -14
  17. data/lib/sequenceserver/sys.rb +86 -0
  18. data/lib/sequenceserver/version.rb +1 -1
  19. data/public/css/sequenceserver.min.css +3 -3
  20. data/public/js/alignment_exporter.js +1 -1
  21. data/public/js/hsp.js +1 -1
  22. data/public/js/report.js +76 -63
  23. data/public/js/search.js +29 -6
  24. data/public/sequenceserver-report.min.js +4 -4
  25. data/public/sequenceserver-search.min.js +2 -2
  26. data/sequenceserver.gemspec +3 -2
  27. data/spec/blast_versions/blast_2.2.30/blast_2.2.30_spec.rb +228 -0
  28. data/spec/blast_versions/blast_2.2.30/import_spec_capybara_local_2.2.30.rb +53 -0
  29. data/spec/blast_versions/blast_2.2.31/blast_2.2.31_spec.rb +228 -0
  30. data/spec/blast_versions/blast_2.2.31/import_spec_capybara_local_2.2.31.rb +53 -0
  31. data/spec/{blast_spec.rb → blast_versions/blast_2.3.0/blast_2.3.0_spec.rb} +8 -37
  32. data/spec/blast_versions/blast_2.3.0/import_spec_capybara_local_2.3.0.rb +52 -0
  33. data/spec/blast_versions/blast_2.4.0/blast_2.4.0_spec.rb +228 -0
  34. data/spec/blast_versions/blast_2.4.0/import_spec_capybara_local_2.4.0.rb +52 -0
  35. data/spec/blast_versions/blast_2.5.0/blast_2.5.0_spec.rb +228 -0
  36. data/spec/blast_versions/blast_2.5.0/import_spec_capybara_local_2.5.0.rb +53 -0
  37. data/spec/blast_versions/blast_2.6.0/blast_2.6.0_spec.rb +228 -0
  38. data/spec/blast_versions/blast_2.6.0/import_spec_capybara_local_2.6.0.rb +52 -0
  39. data/spec/blast_versions/blast_2.7.1/blast_2.7.1_spec.rb +228 -0
  40. data/spec/blast_versions/blast_2.7.1/import_spec_capybara_local_2.7.1.rb +56 -0
  41. data/spec/blast_versions/blast_2.8.1/blast_2.8.1_spec.rb +228 -0
  42. data/spec/blast_versions/blast_2.8.1/import_spec_capybara_local_2.8.1.rb +55 -0
  43. data/spec/blast_versions/blast_2.9.0/blast_2.9.0_spec.rb +228 -0
  44. data/spec/blast_versions/blast_2.9.0/import_spec_capybara_local_2.9.0.rb +53 -0
  45. data/spec/blast_versions/diamond_0.9.24/diamond_0.9.24_spec.rb +176 -0
  46. data/spec/blast_versions/diamond_0.9.24/import_spec_capybara_local_0.9.24.rb +39 -0
  47. data/spec/capybara_spec.local.rb +190 -0
  48. data/spec/capybara_spec.rb +15 -13
  49. data/spec/config_spec.rb +1 -1
  50. data/spec/database/funky_ids/funky_ids.fa +2 -0
  51. data/spec/database/funky_ids/funky_ids.fa.nhd +8 -0
  52. data/spec/database/funky_ids/funky_ids.fa.nhi +0 -0
  53. data/spec/database/funky_ids/funky_ids.fa.nhr +0 -0
  54. data/spec/database/funky_ids/funky_ids.fa.nin +0 -0
  55. data/spec/database/funky_ids/funky_ids.fa.nog +0 -0
  56. data/spec/database/funky_ids/funky_ids.fa.nsd +15 -13
  57. data/spec/database/funky_ids/funky_ids.fa.nsi +0 -0
  58. data/spec/database/funky_ids/funky_ids.fa.nsq +0 -0
  59. data/spec/database_spec.rb +2 -2
  60. data/spec/dotdir/blast_2.2.30/blastn/BLASTN_XML_2.2.30.xml +1201 -0
  61. data/spec/dotdir/blast_2.2.30/blastn/job.yaml +8 -0
  62. data/spec/dotdir/blast_2.2.30/blastn_nohits/BLASTN_NO_HITS_XML_2.2.30.xml +866 -0
  63. data/spec/dotdir/blast_2.2.30/blastn_nohits/job.yaml +8 -0
  64. data/spec/dotdir/blast_2.2.30/blastp/BLASTP_XML_2.2.30.xml +1181 -0
  65. data/spec/dotdir/blast_2.2.30/blastp/job.yaml +8 -0
  66. data/spec/dotdir/blast_2.2.30/blastx/BLASTX_XML_2.2.30.xml +1181 -0
  67. data/spec/dotdir/blast_2.2.30/blastx/job.yaml +8 -0
  68. data/spec/dotdir/blast_2.2.30/tblastn/TBLASTN_XML_2.2.30.xml +1181 -0
  69. data/spec/dotdir/blast_2.2.30/tblastn/job.yaml +8 -0
  70. data/spec/dotdir/blast_2.2.30/tblastx/TBLASTX_XML_2.2.30.xml +8857 -0
  71. data/spec/dotdir/blast_2.2.30/tblastx/job.yaml +8 -0
  72. data/spec/dotdir/blast_2.2.31/blastn/BLASTN_XML_2.2.31.xml +1201 -0
  73. data/spec/dotdir/blast_2.2.31/blastn/job.yaml +8 -0
  74. data/spec/dotdir/blast_2.2.31/blastn_nohits/BLASTN_NO_HITS_XML_2.2.31.xml +866 -0
  75. data/spec/dotdir/blast_2.2.31/blastn_nohits/job.yaml +8 -0
  76. data/spec/dotdir/blast_2.2.31/blastp/BLASTP_XML_2.2.31.xml +1181 -0
  77. data/spec/dotdir/blast_2.2.31/blastp/job.yaml +8 -0
  78. data/spec/dotdir/blast_2.2.31/blastx/BLASTX_XML_2.2.31.xml +1181 -0
  79. data/spec/dotdir/blast_2.2.31/blastx/job.yaml +8 -0
  80. data/spec/dotdir/blast_2.2.31/tblastn/TBLASTN_XML_2.2.31.xml +1181 -0
  81. data/spec/dotdir/blast_2.2.31/tblastn/job.yaml +8 -0
  82. data/spec/dotdir/blast_2.2.31/tblastx/TBLASTX_XML_2.2.31.xml +8857 -0
  83. data/spec/dotdir/blast_2.2.31/tblastx/job.yaml +8 -0
  84. data/spec/dotdir/blast_2.3.0/blastn/BLASTN_XML_2.3.0.xml +1201 -0
  85. data/spec/dotdir/blast_2.3.0/blastn/job.yaml +8 -0
  86. data/spec/dotdir/blast_2.3.0/blastn_nohits/BLASTN_NO_HITS_XML_2.3.0.xml +866 -0
  87. data/spec/dotdir/blast_2.3.0/blastn_nohits/job.yaml +8 -0
  88. data/spec/dotdir/blast_2.3.0/blastp/BLASTP_XML_2.3.0.xml +1181 -0
  89. data/spec/dotdir/blast_2.3.0/blastp/job.yaml +8 -0
  90. data/spec/dotdir/blast_2.3.0/blastx/BLASTX_XML_2.3.0.xml +1181 -0
  91. data/spec/dotdir/blast_2.3.0/blastx/job.yaml +8 -0
  92. data/spec/dotdir/blast_2.3.0/tblastn/TBLASTN_XML_2.3.0.xml +1181 -0
  93. data/spec/dotdir/blast_2.3.0/tblastn/job.yaml +8 -0
  94. data/spec/dotdir/blast_2.3.0/tblastx/TBLASTX_XML_2.3.0.xml +8857 -0
  95. data/spec/dotdir/blast_2.3.0/tblastx/job.yaml +8 -0
  96. data/spec/dotdir/blast_2.4.0/blastn/BLASTN_XML_2.4.0.xml +1201 -0
  97. data/spec/dotdir/blast_2.4.0/blastn/TBLASTN_XML_2.4.0.xml +1181 -0
  98. data/spec/dotdir/blast_2.4.0/blastn/job.yaml +8 -0
  99. data/spec/dotdir/blast_2.4.0/blastn_nohits/BLASTN_NO_HITS_XML_2.4.0.xml +866 -0
  100. data/spec/dotdir/blast_2.4.0/blastn_nohits/job.yaml +8 -0
  101. data/spec/dotdir/blast_2.4.0/blastp/BLASTP_XML_2.4.0.xml +1181 -0
  102. data/spec/dotdir/blast_2.4.0/blastp/job.yaml +8 -0
  103. data/spec/dotdir/blast_2.4.0/blastx/BLASTX_XML_2.4.0.xml +1181 -0
  104. data/spec/dotdir/blast_2.4.0/blastx/job.yaml +8 -0
  105. data/spec/dotdir/blast_2.4.0/tblastn/TBLASTN_XML_2.4.0.xml +1181 -0
  106. data/spec/dotdir/blast_2.4.0/tblastn/job.yaml +8 -0
  107. data/spec/dotdir/blast_2.4.0/tblastx/TBLASTX_XML_2.4.0.xml +8857 -0
  108. data/spec/dotdir/blast_2.4.0/tblastx/job.yaml +8 -0
  109. data/spec/dotdir/blast_2.5.0/blastn/BLASTN_LONG_XML_2.5.0.xml +18813 -0
  110. data/spec/dotdir/blast_2.5.0/blastn/BLASTN_XML_2.5.0.xml +1201 -0
  111. data/spec/dotdir/blast_2.5.0/blastn/job.yaml +8 -0
  112. data/spec/dotdir/blast_2.5.0/blastn_nohits/BLASTN_NO_HITS_XML_2.5.0.xml +866 -0
  113. data/spec/dotdir/blast_2.5.0/blastn_nohits/job.yaml +8 -0
  114. data/spec/dotdir/blast_2.5.0/blastp/BLASTP +2161 -0
  115. data/spec/dotdir/blast_2.5.0/blastp/BLASTP_XML_2.5.0.xml +1181 -0
  116. data/spec/dotdir/blast_2.5.0/blastp/job.yaml +8 -0
  117. data/spec/dotdir/blast_2.5.0/blastx/BLASTX +28080 -0
  118. data/spec/dotdir/blast_2.5.0/blastx/BLASTX_XML_2.5.0.xml +1181 -0
  119. data/spec/dotdir/blast_2.5.0/blastx/job.yaml +8 -0
  120. data/spec/dotdir/blast_2.5.0/tblastn/TBLASTN +29486 -0
  121. data/spec/dotdir/blast_2.5.0/tblastn/TBLASTN_XML_2.5.0.xml +1181 -0
  122. data/spec/dotdir/blast_2.5.0/tblastn/job.yaml +8 -0
  123. data/spec/dotdir/blast_2.5.0/tblastx/TBLASTX +180859 -0
  124. data/spec/dotdir/blast_2.5.0/tblastx/TBLASTX_XML_2.5.0.xml +8857 -0
  125. data/spec/dotdir/blast_2.5.0/tblastx/job.yaml +8 -0
  126. data/spec/dotdir/blast_2.6.0/blastn/BLASTN_XML_2.6.0.xml +1201 -0
  127. data/spec/dotdir/blast_2.6.0/blastn/job.yaml +8 -0
  128. data/spec/dotdir/blast_2.6.0/blastn_nohits/BLASTN_NO_HITS_XML_2.6.0.xml +866 -0
  129. data/spec/dotdir/blast_2.6.0/blastn_nohits/job.yaml +8 -0
  130. data/spec/dotdir/blast_2.6.0/blastp/BLASTP_XML_2.6.0.xml +1181 -0
  131. data/spec/dotdir/blast_2.6.0/blastp/job.yaml +8 -0
  132. data/spec/dotdir/blast_2.6.0/blastx/BLASTX_XML_2.6.0.xml +1181 -0
  133. data/spec/dotdir/blast_2.6.0/blastx/job.yaml +8 -0
  134. data/spec/dotdir/blast_2.6.0/tblastn/TBLASTN_XML_2.6.0.xml +1181 -0
  135. data/spec/dotdir/blast_2.6.0/tblastn/job.yaml +8 -0
  136. data/spec/dotdir/blast_2.6.0/tblastx/TBLASTX_XML_2.6.0.xml +8857 -0
  137. data/spec/dotdir/blast_2.6.0/tblastx/job.yaml +8 -0
  138. data/spec/dotdir/blast_2.7.1/blastn/BLASTN_XML_2.7.1.xml +1201 -0
  139. data/spec/dotdir/blast_2.7.1/blastn/job.yaml +8 -0
  140. data/spec/dotdir/blast_2.7.1/blastn_nohits/BLASTN_NO_HITS_XML_2.7.1.xml +866 -0
  141. data/spec/dotdir/blast_2.7.1/blastn_nohits/job.yaml +8 -0
  142. data/spec/dotdir/blast_2.7.1/blastp/BLASTP_XML_2.7.1.xml +1181 -0
  143. data/spec/dotdir/blast_2.7.1/blastp/job.yaml +8 -0
  144. data/spec/dotdir/blast_2.7.1/blastx/BLASTX_XML_2.7.1.xml +1181 -0
  145. data/spec/dotdir/blast_2.7.1/blastx/job.yaml +8 -0
  146. data/spec/dotdir/blast_2.7.1/tblastn/TBLASTN_XML_2.7.1.xml +1181 -0
  147. data/spec/dotdir/blast_2.7.1/tblastn/job.yaml +8 -0
  148. data/spec/dotdir/blast_2.7.1/tblastx/TBLASTX_XML_2.7.1.xml +8857 -0
  149. data/spec/dotdir/blast_2.7.1/tblastx/job.yaml +8 -0
  150. data/spec/dotdir/blast_2.8.1/blastn/BLASTN_XML_2.8.1.xml +1201 -0
  151. data/spec/dotdir/blast_2.8.1/blastn/job.yaml +8 -0
  152. data/spec/dotdir/blast_2.8.1/blastn_nohits/BLASTN_NO_HITS_XML_2.8.1.xml +866 -0
  153. data/spec/dotdir/blast_2.8.1/blastn_nohits/job.yaml +8 -0
  154. data/spec/dotdir/blast_2.8.1/blastp/BLASTP_XML_2.8.1.xml +1181 -0
  155. data/spec/dotdir/blast_2.8.1/blastp/job.yaml +8 -0
  156. data/spec/dotdir/blast_2.8.1/blastx/BLASTX_XML_2.8.1.xml +1181 -0
  157. data/spec/dotdir/blast_2.8.1/blastx/job.yaml +8 -0
  158. data/spec/dotdir/blast_2.8.1/tblastn/TBLASTN_XML_2.8.1.xml +1181 -0
  159. data/spec/dotdir/blast_2.8.1/tblastn/job.yaml +8 -0
  160. data/spec/dotdir/blast_2.8.1/tblastx/TBLASTX_XML_2.8.1.xml +8857 -0
  161. data/spec/dotdir/blast_2.8.1/tblastx/job.yaml +8 -0
  162. data/spec/dotdir/blast_2.9.0/blastn/BLASTN_XML_2.9.0.xml +1201 -0
  163. data/spec/dotdir/blast_2.9.0/blastn/job.yaml +8 -0
  164. data/spec/dotdir/blast_2.9.0/blastn_nohits/BLASTN_NO_HITS_XML_2.9.0.xml +866 -0
  165. data/spec/dotdir/blast_2.9.0/blastn_nohits/job.yaml +8 -0
  166. data/spec/dotdir/blast_2.9.0/blastp/BLASTP_XML_2.9.0.xml +1181 -0
  167. data/spec/dotdir/blast_2.9.0/blastp/job.yaml +8 -0
  168. data/spec/dotdir/blast_2.9.0/blastx/BLASTX_XML_2.9.0.xml +1181 -0
  169. data/spec/dotdir/blast_2.9.0/blastx/job.yaml +8 -0
  170. data/spec/dotdir/blast_2.9.0/tblastn/TBLASTN_XML_2.9.0.xml +1181 -0
  171. data/spec/dotdir/blast_2.9.0/tblastn/job.yaml +8 -0
  172. data/spec/dotdir/blast_2.9.0/tblastx/TBLASTX_XML_2.9.0.xml +8857 -0
  173. data/spec/dotdir/blast_2.9.0/tblastx/job.yaml +8 -0
  174. data/spec/dotdir/diamond_0.9.24/blastp/DIAMOND_BLASTP_0.9.24.xml +1040 -0
  175. data/spec/dotdir/diamond_0.9.24/blastp/job.yaml +8 -0
  176. data/spec/dotdir/diamond_0.9.24/blastx/DIAMOND_BLASTX_0.9.24.xml +1040 -0
  177. data/spec/dotdir/diamond_0.9.24/blastx/job.yaml +8 -0
  178. data/spec/dotdir/diamond_0.9.24/blastx_nohits/DIAMOND_BLASTX_NOHITS_0.9.24.xml +41 -0
  179. data/spec/dotdir/diamond_0.9.24/blastx_nohits/job.yaml +8 -0
  180. data/spec/download_helper.rb +32 -0
  181. data/spec/import_spec_capybara_local.rb +61 -0
  182. data/spec/sequence_spec.rb +1 -2
  183. data/spec/sequences/Nucleotide_TP53_COX41.fasta +15 -0
  184. data/spec/sequences/Protein_TP53_COX41.fasta +12 -0
  185. data/spec/sequences/Query_1_SI2_2_0_06267.txt +6 -0
  186. data/spec/{nucleotide_query.fa → sequences/nucleotide_query.fa} +0 -0
  187. data/spec/sequences/problematic_query.fa +5 -0
  188. data/spec/sequences/protein_query.fa +9 -0
  189. data/spec/sequences/sample_query_fire_ant_obps.fa +44 -0
  190. data/spec/sequences/sequenceserver-SI2.2.0_06267.fa +5 -0
  191. data/spec/sequenceserver_spec.rb +3 -3
  192. data/spec/spec_helper.rb +59 -0
  193. metadata +185 -28
  194. data/spec/protein_query.fa +0 -21
  195. data/spec/sample_reports/blastn_sample/job.yaml +0 -10
  196. data/spec/sample_reports/blastn_sample/stdout +0 -144
  197. data/spec/sample_reports/blastp_sample/job.yaml +0 -10
  198. data/spec/sample_reports/blastp_sample/stdout +0 -1187
  199. data/spec/sample_reports/blastx_sample/job.yaml +0 -10
  200. data/spec/sample_reports/blastx_sample/stdout +0 -1191
  201. data/spec/sample_reports/no_hits_sample/job.yaml +0 -10
  202. data/spec/sample_reports/no_hits_sample/stdout +0 -130
  203. data/spec/sample_reports/tblastn_sample/job.yaml +0 -10
  204. data/spec/sample_reports/tblastn_sample/stdout +0 -1107
  205. data/spec/sample_reports/tblastx_sample/job.yaml +0 -10
  206. data/spec/sample_reports/tblastx_sample/stdout +0 -2422
  207. data/spec/sample_reports/with_hits_sample/job.yaml +0 -10
  208. data/spec/sample_reports/with_hits_sample/stdout +0 -1489
@@ -18,7 +18,9 @@ module SequenceServer
18
18
  'The requested job could not be found'
19
19
  end
20
20
 
21
- undef_method :backtrace
21
+ def more_info
22
+ ''
23
+ end
22
24
  end
23
25
 
24
26
  # Errors caused due to incorrect user input.
@@ -1,12 +1,9 @@
1
1
  module SequenceServer
2
2
  # Define BLAST::Hit.
3
3
  module BLAST
4
- # Hit Object to store all the hits per Query. HSPs per hit should be sorted
5
- # in ascending order of evalue.
4
+ # Hit object to store all the hits per Query.
6
5
  Hit = Struct.new(:query, :number, :id, :accession, :title,
7
6
  :length, :sciname, :qcovs, :hsps) do
8
- include Links
9
-
10
7
  def initialize(*args)
11
8
  args[1] = args[1].to_i
12
9
  args[4] = '' if args[4] == 'No definition line'
@@ -16,29 +13,45 @@ module SequenceServer
16
13
  super
17
14
  end
18
15
 
19
- # Hit's score is the sum of score of all HSPs.
20
- def score
21
- hsps.map(&:score).reduce(:+)
16
+ # This gets called when #to_json is called on report object in routes. We
17
+ # cannot use the to_json method provided by Struct class because what we
18
+ # want to send to the browser differs from the attributes declared with
19
+ # Struct class. Some of these are derived data such as score, identity,
20
+ # custom links. While some attributes are necessary for internal
21
+ # representation.
22
+ def to_json(*args)
23
+ # List all attributes that we want to send to the browser.
24
+ properties = %i[number id accession title length score identity
25
+ qcovs sciname evalue hsps links]
26
+ properties.inject({}) { |h, k| h[k] = send(k); h }.to_json(*args)
22
27
  end
23
28
 
24
- # Hit's identity is the sum of identity of all
25
- # HSPs divided by sum of length of all HSPs
26
- # (expressed as percentagge).
27
- def identity
28
- hsps.map(&:identity).reduce(:+) * 100 / hsps.map(&:length).reduce(:+)
29
- end
29
+ ###
30
+ # Link generator functionality.
31
+ ###
32
+
33
+ # Include the Links module.
34
+ include Links
30
35
 
36
+ # Links returns a list of Hashes that can be easily turned into an href
37
+ # in the client. These are derived by calling link generators, that is,
38
+ # instance methods of the Links module.
31
39
  def links
32
40
  links = Links.instance_methods.map { |m| send m }
33
41
  links.compact!
34
42
  links.sort_by { |link| [link[:order], link[:title]] }
35
43
  end
36
44
 
45
+ # Returns the database type (nucleotide or protein).
46
+ def dbtype
47
+ report.dbtype
48
+ end
49
+
37
50
  # Returns a list of databases that contain this hit.
38
51
  #
39
52
  # e.g., whichdb('SI_2.2.23') => [<Database: ...>, ...]
40
53
  def whichdb
41
- querydb.select { |db| db.include? id }
54
+ report.querydb.select { |db| db.include? id }
42
55
  end
43
56
 
44
57
  # Returns tuple of tuple indicating start and end coordinates of matched
@@ -52,26 +65,33 @@ module SequenceServer
52
65
  [[qstart_min, qend_max], [sstart_min, send_max]]
53
66
  end
54
67
 
55
- # NOTE: Evalue of a hit is meaningless. This is here for code that needs
56
- # minimum evalue of all HSPs.
57
- def evalue
58
- hsps.first.evalue
59
- end
68
+ ###
69
+ # Score, identity, and evalue attributes below are used in tabular summary
70
+ # of hits in the HTML report. At some point we should move these to the
71
+ # client.
72
+ ###
60
73
 
61
- def to_json(*args)
62
- %i[number id accession title length score identity qcovs
63
- sciname evalue hsps links].inject({}) { |h, k|
64
- h[k] = send(k)
65
- h
66
- }.to_json(*args)
74
+ # Returns the sum of scores of all HSPs.
75
+ def score
76
+ hsps.map(&:score).reduce(:+)
67
77
  end
68
78
 
69
- private
79
+ # Returns the sum of identity of all HSPs divided by sum of length of all
80
+ # HSPs (expressed as percentage).
81
+ def identity
82
+ hsps.map(&:identity).reduce(:+) * 100 / hsps.map(&:length).reduce(:+)
83
+ end
70
84
 
71
- def querydb
72
- report.querydb
85
+ # Returns the minimum evalue of all HSPs of the Hit. This is shown in the
86
+ # tabular overview of hits in the HTML report.
87
+ def evalue
88
+ hsps.first.evalue
73
89
  end
74
90
 
91
+ private
92
+
93
+ # Returns the report object that this hit is a part of. This is used to
94
+ # access list of databases etc.
75
95
  def report
76
96
  query.report
77
97
  end
@@ -6,13 +6,26 @@ module SequenceServer
6
6
  # Extends SequenceServer::Job to describe a BLAST job.
7
7
  class Job < Job
8
8
  def initialize(params)
9
- validate params
10
- super do
11
- @method = params[:method]
12
- @qfile = store('query.fa', params[:sequence])
13
- @databases = Database[params[:databases]]
14
- @options = params[:advanced].to_s.strip + defaults
15
- @advanced_params = parse_advanced params[:advanced]
9
+ if params.key?(:xml)
10
+ super do
11
+ @imported_xml_file = File.basename params[:xml]
12
+ # Copy over the XML file to job directory so that a job dir in
13
+ # itself is self-contained. This will help with tests among
14
+ # other things.
15
+ FileUtils.cp(params[:xml], dir)
16
+ @advanced_params = {}
17
+ @databases = []
18
+ done!
19
+ end
20
+ else
21
+ validate params
22
+ super do
23
+ @method = params[:method]
24
+ @qfile = store('query.fa', params[:sequence])
25
+ @databases = Database[params[:databases]]
26
+ @options = params[:advanced].to_s.strip + defaults
27
+ @advanced_params = parse_advanced params[:advanced]
28
+ end
16
29
  end
17
30
  end
18
31
 
@@ -22,6 +35,13 @@ module SequenceServer
22
35
  # Attributes used by us - should be considered private.
23
36
  attr_reader :method, :qfile, :databases, :options
24
37
 
38
+ # :nodoc:
39
+ # Returns path to the imported xml file if the job was created using the
40
+ # --import switch. Returns nil otherwise.
41
+ def imported_xml_file
42
+ File.join(dir, @imported_xml_file) if @imported_xml_file
43
+ end
44
+
25
45
  # Returns the command that will be executed. Job super class takes care
26
46
  # of actual execution.
27
47
  def command
@@ -146,7 +166,7 @@ module SequenceServer
146
166
  end
147
167
 
148
168
  def disallowed_options
149
- /-out|-html|-outfmt|-db|-query|-num_threads/i
169
+ /-out|-html|-outfmt|-db |-query|-num_threads/i
150
170
  end
151
171
  end
152
172
  end
@@ -29,18 +29,38 @@ module SequenceServer
29
29
  end
30
30
  end
31
31
 
32
- # :nodoc:
33
32
  # Attributes parsed out from XML output.
34
- attr_reader :program, :program_version
35
- attr_reader :queries, :querydb
36
- attr_reader :params, :stats
33
+ attr_reader :program, :program_version, :params, :stats, :queries
34
+
35
+ # This is obtained from the job object.
36
+ attr_reader :querydb
37
+
38
+ # Returns database type (nucleotide or protein) used for running BLAST
39
+ # search. If we ran the BLAST search, this information is available
40
+ # from Job#databases. For imported XML, this is inferred from
41
+ # Report#program (i.e., the BLAST algorithm)
42
+ def dbtype
43
+ return @dbtype if @dbtype
44
+ @dbtype = if @querydb.empty?
45
+ case program
46
+ when /blastn|tblastn|tblastx/
47
+ 'nucleotide'
48
+ when /blastp|blastx/
49
+ 'protein'
50
+ end
51
+ else
52
+ @querydb.first.type
53
+ end
54
+ end
37
55
 
38
56
  def to_json
39
57
  [:querydb, :program, :program_version, :params, :stats,
40
58
  :queries].inject({}) { |h, k|
41
59
  h[k] = send(k)
42
60
  h
43
- }.update(search_id: job.id, submitted_at: job.submitted_at.utc).to_json
61
+ }.update(search_id: job.id,
62
+ submitted_at: job.submitted_at.utc,
63
+ imported_xml: !!job.imported_xml_file).to_json
44
64
  end
45
65
 
46
66
  private
@@ -48,12 +68,25 @@ module SequenceServer
48
68
  # Generate report.
49
69
  def generate
50
70
  job.raise!
51
- xml_ir = parse_xml File.read(Formatter.run(job, 'xml').file)
52
- tsv_ir = parse_tsv File.read(Formatter.run(job, 'custom_tsv').file)
53
- extract_program_info xml_ir
54
- extract_params xml_ir
55
- extract_stats xml_ir
56
- extract_queries xml_ir, tsv_ir
71
+ if job.imported_xml_file
72
+ xml_ir = parse_xml File.read(job.imported_xml_file)
73
+ tsv_ir = Hash.new do |h1,k1|
74
+ h1[k1] = Hash.new do |h2,k2|
75
+ h2[k2]=['','',[]]
76
+ end
77
+ end
78
+ extract_program_info xml_ir
79
+ extract_params xml_ir
80
+ extract_stats xml_ir
81
+ extract_queries xml_ir, tsv_ir
82
+ else
83
+ xml_ir = parse_xml File.read(Formatter.run(job, 'xml').file)
84
+ tsv_ir = parse_tsv File.read(Formatter.run(job, 'custom_tsv').file )
85
+ extract_program_info xml_ir
86
+ extract_params xml_ir
87
+ extract_stats xml_ir
88
+ extract_queries xml_ir, tsv_ir
89
+ end
57
90
  end
58
91
 
59
92
  # Make program name and program name + version available via `program`
@@ -107,10 +140,14 @@ module SequenceServer
107
140
  def extract_hits(xml_ir, tsv_ir, query)
108
141
  return if xml_ir == ["\n"] # => No hits.
109
142
  xml_ir.each do |n|
110
- # If hit comes from a non -parse_seqids database, then
111
- # we assign id to accession and process hit defline to
112
- # obtain id and title.
113
- if n[1] =~ /^gnl\|/
143
+ # If hit comes from a non -parse_seqids database, then id (n[1]) is a
144
+ # BLAST assigned internal id of the format 'gnl|BL_ORD_ID|serial'. We
145
+ # assign the id to accession (because we use accession for sequence
146
+ # retrieval and this id is what blastdbcmd expects for non
147
+ # -parse_seqids databases) and parse the hit defline to
148
+ # obtain id and title ourselves (we use id and title
149
+ # for display purposes).
150
+ if n[1] =~ /^gnl\|BL_ORD_ID\|\d+/
114
151
  n[3] = n[1]
115
152
  defline = n[2].split
116
153
  n[1] = defline.shift
@@ -36,6 +36,19 @@ module SequenceServer
36
36
 
37
37
  attr_reader :id
38
38
 
39
+ def retrieve(accession, coords = nil)
40
+ cmd = "blastdbcmd -db #{name} -entry '#{accession}'"
41
+ if coords
42
+ cmd << " -range #{coords}"
43
+ end
44
+ out, = sys(cmd, path: config[:bin])
45
+ out.chomp
46
+ rescue CommandFailed
47
+ # Command failed beacuse stdout was empty, meaning accession not
48
+ # present in this database.
49
+ nil
50
+ end
51
+
39
52
  def include?(accession)
40
53
  cmd = "blastdbcmd -entry '#{accession}' -db #{name}"
41
54
  out, = sys(cmd, path: config[:bin])
@@ -103,6 +116,64 @@ module SequenceServer
103
116
  collection.values.to_json
104
117
  end
105
118
 
119
+ # Retrieve given loci from the databases we have.
120
+ #
121
+ # loci to retrieve are specified as a String:
122
+ #
123
+ # "accession_1,accession_2:start-stop,accession_3"
124
+ #
125
+ # Return value is a FASTA format String containing sequences in the same
126
+ # order in which they were requested. If an accession could not be found,
127
+ # a commented out error message is included in place of the sequence.
128
+ # Sequences are retrieved from the first database in which the accession
129
+ # is found. The returned sequences can, thus, be incorrect if accessions
130
+ # are not unique across all database (admins should make sure of that).
131
+ def retrieve(loci)
132
+ # Exit early if loci is nil.
133
+ return unless loci
134
+
135
+ # String -> Array
136
+ # We may have empty string if loci contains a double comma as a result
137
+ # of typo (remember - loci is external input). These are eliminated.
138
+ loci = loci.split(',').delete_if(&:empty?)
139
+
140
+ # Each database is searched for each locus. For each locus, search is
141
+ # terminated on the first database match.
142
+ # NOTE: This can return incorrect sequence if the sequence ids are
143
+ # not unique across all databases.
144
+ seqs = loci.map do |locus|
145
+ # Get sequence id and coords. coords may be nil. accession can't
146
+ # be.
147
+ accession, coords = locus.split(':')
148
+
149
+ # Initialise a variable to store retrieved sequence.
150
+ seq = nil
151
+
152
+ # Go over each database looking for this accession.
153
+ each do |database|
154
+ # Database lookup will return a string if given accession is
155
+ # present in the database, nil otherwise.
156
+ seq = database.retrieve(accession, coords)
157
+ # Found a match! Terminate iteration returning the retrieved
158
+ # sequence.
159
+ break if seq
160
+ end
161
+
162
+ # If accession was not present in any database, insert an error
163
+ # message in place of the sequence. The line starts with '#'
164
+ # and should be ignored by BLAST (not tested).
165
+ unless seq
166
+ seq = "# ERROR: #{locus} not found in any database"
167
+ end
168
+
169
+ # Return seq.
170
+ seq
171
+ end
172
+
173
+ # Array -> String
174
+ seqs.join("\n")
175
+ end
176
+
106
177
  # Intended to be used only for testing.
107
178
  def first
108
179
  all.first
@@ -61,21 +61,16 @@ module SequenceServer
61
61
  end
62
62
  end
63
63
 
64
- ## BLAST NOT INSTALLED OR NOT COMPATIBLE ##
64
+ ## BLAST NOT INSTALLED, NOT EXECUTABLE, OR NOT COMPATIBLE ##
65
65
 
66
66
  # Raised if SequenceServer could not locate NCBI BLAST+ installation on
67
67
  # user's system.
68
68
  class BLAST_NOT_INSTALLED_OR_NOT_EXECUTABLE < StandardError
69
69
  def to_s
70
- 'BLAST not installed, or is not executable.'
71
- end
72
- end
73
-
74
- # Raised if SequenceServer could not successfully execute 'blastp -version'
75
- # on user's system (see #141).
76
- class BLAST_NOT_EXECUTABLE < StandardError
77
- def to_s
78
- 'Error executing BLAST+ binaries.'
70
+ <<~MSG
71
+ BLAST+ is either not installed, or there is a problem with the
72
+ installed version.
73
+ MSG
79
74
  end
80
75
  end
81
76
 
@@ -25,7 +25,7 @@ module SequenceServer
25
25
  # Creates and queues a job. Returns created job object.
26
26
  def create(params)
27
27
  job = BLAST::Job.new(params) # TODO: Dynamic dispatch.
28
- SequenceServer.pool.queue { job.run }
28
+ pool.queue { job.run }
29
29
  job
30
30
  end
31
31
 
@@ -46,6 +46,13 @@ module SequenceServer
46
46
  Dir["#{DOTDIR}/**/job.yaml"]
47
47
  .map { |f| fetch File.basename File.dirname f }
48
48
  end
49
+
50
+ private
51
+
52
+ # Thread pool used for running BLAST searches.
53
+ def pool
54
+ @pool ||= Pool.new(SequenceServer.config[:num_threads])
55
+ end
49
56
  end
50
57
 
51
58
  include FileUtils
@@ -9,8 +9,10 @@ module SequenceServer
9
9
 
10
10
  NCBI_ID_PATTERN = /gi\|(\d+)\|/
11
11
  UNIPROT_ID_PATTERN = /sp\|(\w+)\|/
12
+ PFAM_ID_PATTERN = /(PF\d{5}\.?\d*)/
13
+ RFAM_ID_PATTERN = /(RF\d{5})/
12
14
 
13
- # Link generators return a Hash like below.
15
+ # Link generators are methods that return a Hash as defined below.
14
16
  #
15
17
  # {
16
18
  # # Required. Display title.
@@ -45,25 +47,29 @@ module SequenceServer
45
47
  # sequence_id = encode sequence_id
46
48
  # url = "http://www.ncbi.nlm.nih.gov/nucleotide/#{sequence_id}"
47
49
  #
48
- # querydb:
49
- # Returns an array of databases that were used for BLASTing.
50
+ # dbtype:
51
+ # Returns the database type (nucleotide or protein) that was used for
52
+ # BLAST search.
50
53
  #
51
54
  # whichdb:
52
- # Returns the database from which the given hit came from.
53
- #
54
- # e.g:
55
- #
56
- # hit_database = whichdb
57
- #
58
- # Examples:
59
- # ---------
60
- # See methods provided by default for an example implementation.
55
+ # Returns the databases from which the hit could have originated. To
56
+ # ensure that one and the correct database is returned, ensure that
57
+ # your sequence ids are unique across different FASTA files.
58
+ # NOTE: This method is slow.
59
+ #
60
+ # coordinates:
61
+ # Returns min alignment start and max alignment end coordinates for
62
+ # query and hit sequences.
63
+ #
64
+ # e.g.,
65
+ # query_coords = coordinates[0]
66
+ # hit_coords = coordinates[1]
61
67
 
62
68
  def ncbi
63
- return nil unless id.match(NCBI_ID_PATTERN)
69
+ return nil unless id.match(NCBI_ID_PATTERN) or title.match(NCBI_ID_PATTERN)
64
70
  ncbi_id = Regexp.last_match[1]
65
71
  ncbi_id = encode ncbi_id
66
- url = "http://www.ncbi.nlm.nih.gov/#{querydb.first.type}/#{ncbi_id}"
72
+ url = "https://www.ncbi.nlm.nih.gov/#{dbtype}/#{ncbi_id}"
67
73
  {
68
74
  order: 2,
69
75
  title: 'NCBI',
@@ -73,13 +79,39 @@ module SequenceServer
73
79
  end
74
80
 
75
81
  def uniprot
76
- return nil unless id.match(UNIPROT_ID_PATTERN)
82
+ return nil unless id.match(UNIPROT_ID_PATTERN) or title.match(UNIPROT_ID_PATTERN)
77
83
  uniprot_id = Regexp.last_match[1]
78
84
  uniprot_id = encode uniprot_id
79
- url = "http://www.uniprot.org/uniprot/#{uniprot_id}"
85
+ url = "https://www.uniprot.org/uniprot/#{uniprot_id}"
86
+ {
87
+ order: 2,
88
+ title: 'UniProt',
89
+ url: url,
90
+ icon: 'fa-external-link'
91
+ }
92
+ end
93
+
94
+ def pfam
95
+ return nil unless id.match(PFAM_ID_PATTERN) or title.match(PFAM_ID_PATTERN)
96
+ pfam_id = Regexp.last_match[1]
97
+ pfam_id = encode pfam_id
98
+ url = "https://pfam.xfam.org/family/#{pfam_id}"
99
+ {
100
+ order: 2,
101
+ title: 'Pfam',
102
+ url: url,
103
+ icon: 'fa-external-link'
104
+ }
105
+ end
106
+
107
+ def rfam
108
+ return nil unless id.match(RFAM_ID_PATTERN) or title.match(RFAM_ID_PATTERN)
109
+ rfam_id = Regexp.last_match[1]
110
+ rfam_id = encode rfam_id
111
+ url = "https://rfam.xfam.org/family/#{rfam_id}"
80
112
  {
81
113
  order: 2,
82
- title: 'Uniprot',
114
+ title: 'Rfam',
83
115
  url: url,
84
116
  icon: 'fa-external-link'
85
117
  }