sequenceserver 1.1.0.beta12 → 2.0.0.beta1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sequenceserver might be problematic. Click here for more details.

Files changed (208) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec +1 -0
  3. data/.travis.yml +5 -5
  4. data/README.md +86 -43
  5. data/bin/sequenceserver +29 -32
  6. data/lib/sequenceserver.rb +86 -123
  7. data/lib/sequenceserver/api_errors.rb +3 -1
  8. data/lib/sequenceserver/blast/hit.rb +48 -28
  9. data/lib/sequenceserver/blast/job.rb +28 -8
  10. data/lib/sequenceserver/blast/report.rb +52 -15
  11. data/lib/sequenceserver/database.rb +71 -0
  12. data/lib/sequenceserver/exceptions.rb +5 -10
  13. data/lib/sequenceserver/job.rb +8 -1
  14. data/lib/sequenceserver/links.rb +49 -17
  15. data/lib/sequenceserver/logger.rb +2 -2
  16. data/lib/sequenceserver/routes.rb +20 -14
  17. data/lib/sequenceserver/sys.rb +86 -0
  18. data/lib/sequenceserver/version.rb +1 -1
  19. data/public/css/sequenceserver.min.css +3 -3
  20. data/public/js/alignment_exporter.js +1 -1
  21. data/public/js/hsp.js +1 -1
  22. data/public/js/report.js +76 -63
  23. data/public/js/search.js +29 -6
  24. data/public/sequenceserver-report.min.js +4 -4
  25. data/public/sequenceserver-search.min.js +2 -2
  26. data/sequenceserver.gemspec +3 -2
  27. data/spec/blast_versions/blast_2.2.30/blast_2.2.30_spec.rb +228 -0
  28. data/spec/blast_versions/blast_2.2.30/import_spec_capybara_local_2.2.30.rb +53 -0
  29. data/spec/blast_versions/blast_2.2.31/blast_2.2.31_spec.rb +228 -0
  30. data/spec/blast_versions/blast_2.2.31/import_spec_capybara_local_2.2.31.rb +53 -0
  31. data/spec/{blast_spec.rb → blast_versions/blast_2.3.0/blast_2.3.0_spec.rb} +8 -37
  32. data/spec/blast_versions/blast_2.3.0/import_spec_capybara_local_2.3.0.rb +52 -0
  33. data/spec/blast_versions/blast_2.4.0/blast_2.4.0_spec.rb +228 -0
  34. data/spec/blast_versions/blast_2.4.0/import_spec_capybara_local_2.4.0.rb +52 -0
  35. data/spec/blast_versions/blast_2.5.0/blast_2.5.0_spec.rb +228 -0
  36. data/spec/blast_versions/blast_2.5.0/import_spec_capybara_local_2.5.0.rb +53 -0
  37. data/spec/blast_versions/blast_2.6.0/blast_2.6.0_spec.rb +228 -0
  38. data/spec/blast_versions/blast_2.6.0/import_spec_capybara_local_2.6.0.rb +52 -0
  39. data/spec/blast_versions/blast_2.7.1/blast_2.7.1_spec.rb +228 -0
  40. data/spec/blast_versions/blast_2.7.1/import_spec_capybara_local_2.7.1.rb +56 -0
  41. data/spec/blast_versions/blast_2.8.1/blast_2.8.1_spec.rb +228 -0
  42. data/spec/blast_versions/blast_2.8.1/import_spec_capybara_local_2.8.1.rb +55 -0
  43. data/spec/blast_versions/blast_2.9.0/blast_2.9.0_spec.rb +228 -0
  44. data/spec/blast_versions/blast_2.9.0/import_spec_capybara_local_2.9.0.rb +53 -0
  45. data/spec/blast_versions/diamond_0.9.24/diamond_0.9.24_spec.rb +176 -0
  46. data/spec/blast_versions/diamond_0.9.24/import_spec_capybara_local_0.9.24.rb +39 -0
  47. data/spec/capybara_spec.local.rb +190 -0
  48. data/spec/capybara_spec.rb +15 -13
  49. data/spec/config_spec.rb +1 -1
  50. data/spec/database/funky_ids/funky_ids.fa +2 -0
  51. data/spec/database/funky_ids/funky_ids.fa.nhd +8 -0
  52. data/spec/database/funky_ids/funky_ids.fa.nhi +0 -0
  53. data/spec/database/funky_ids/funky_ids.fa.nhr +0 -0
  54. data/spec/database/funky_ids/funky_ids.fa.nin +0 -0
  55. data/spec/database/funky_ids/funky_ids.fa.nog +0 -0
  56. data/spec/database/funky_ids/funky_ids.fa.nsd +15 -13
  57. data/spec/database/funky_ids/funky_ids.fa.nsi +0 -0
  58. data/spec/database/funky_ids/funky_ids.fa.nsq +0 -0
  59. data/spec/database_spec.rb +2 -2
  60. data/spec/dotdir/blast_2.2.30/blastn/BLASTN_XML_2.2.30.xml +1201 -0
  61. data/spec/dotdir/blast_2.2.30/blastn/job.yaml +8 -0
  62. data/spec/dotdir/blast_2.2.30/blastn_nohits/BLASTN_NO_HITS_XML_2.2.30.xml +866 -0
  63. data/spec/dotdir/blast_2.2.30/blastn_nohits/job.yaml +8 -0
  64. data/spec/dotdir/blast_2.2.30/blastp/BLASTP_XML_2.2.30.xml +1181 -0
  65. data/spec/dotdir/blast_2.2.30/blastp/job.yaml +8 -0
  66. data/spec/dotdir/blast_2.2.30/blastx/BLASTX_XML_2.2.30.xml +1181 -0
  67. data/spec/dotdir/blast_2.2.30/blastx/job.yaml +8 -0
  68. data/spec/dotdir/blast_2.2.30/tblastn/TBLASTN_XML_2.2.30.xml +1181 -0
  69. data/spec/dotdir/blast_2.2.30/tblastn/job.yaml +8 -0
  70. data/spec/dotdir/blast_2.2.30/tblastx/TBLASTX_XML_2.2.30.xml +8857 -0
  71. data/spec/dotdir/blast_2.2.30/tblastx/job.yaml +8 -0
  72. data/spec/dotdir/blast_2.2.31/blastn/BLASTN_XML_2.2.31.xml +1201 -0
  73. data/spec/dotdir/blast_2.2.31/blastn/job.yaml +8 -0
  74. data/spec/dotdir/blast_2.2.31/blastn_nohits/BLASTN_NO_HITS_XML_2.2.31.xml +866 -0
  75. data/spec/dotdir/blast_2.2.31/blastn_nohits/job.yaml +8 -0
  76. data/spec/dotdir/blast_2.2.31/blastp/BLASTP_XML_2.2.31.xml +1181 -0
  77. data/spec/dotdir/blast_2.2.31/blastp/job.yaml +8 -0
  78. data/spec/dotdir/blast_2.2.31/blastx/BLASTX_XML_2.2.31.xml +1181 -0
  79. data/spec/dotdir/blast_2.2.31/blastx/job.yaml +8 -0
  80. data/spec/dotdir/blast_2.2.31/tblastn/TBLASTN_XML_2.2.31.xml +1181 -0
  81. data/spec/dotdir/blast_2.2.31/tblastn/job.yaml +8 -0
  82. data/spec/dotdir/blast_2.2.31/tblastx/TBLASTX_XML_2.2.31.xml +8857 -0
  83. data/spec/dotdir/blast_2.2.31/tblastx/job.yaml +8 -0
  84. data/spec/dotdir/blast_2.3.0/blastn/BLASTN_XML_2.3.0.xml +1201 -0
  85. data/spec/dotdir/blast_2.3.0/blastn/job.yaml +8 -0
  86. data/spec/dotdir/blast_2.3.0/blastn_nohits/BLASTN_NO_HITS_XML_2.3.0.xml +866 -0
  87. data/spec/dotdir/blast_2.3.0/blastn_nohits/job.yaml +8 -0
  88. data/spec/dotdir/blast_2.3.0/blastp/BLASTP_XML_2.3.0.xml +1181 -0
  89. data/spec/dotdir/blast_2.3.0/blastp/job.yaml +8 -0
  90. data/spec/dotdir/blast_2.3.0/blastx/BLASTX_XML_2.3.0.xml +1181 -0
  91. data/spec/dotdir/blast_2.3.0/blastx/job.yaml +8 -0
  92. data/spec/dotdir/blast_2.3.0/tblastn/TBLASTN_XML_2.3.0.xml +1181 -0
  93. data/spec/dotdir/blast_2.3.0/tblastn/job.yaml +8 -0
  94. data/spec/dotdir/blast_2.3.0/tblastx/TBLASTX_XML_2.3.0.xml +8857 -0
  95. data/spec/dotdir/blast_2.3.0/tblastx/job.yaml +8 -0
  96. data/spec/dotdir/blast_2.4.0/blastn/BLASTN_XML_2.4.0.xml +1201 -0
  97. data/spec/dotdir/blast_2.4.0/blastn/TBLASTN_XML_2.4.0.xml +1181 -0
  98. data/spec/dotdir/blast_2.4.0/blastn/job.yaml +8 -0
  99. data/spec/dotdir/blast_2.4.0/blastn_nohits/BLASTN_NO_HITS_XML_2.4.0.xml +866 -0
  100. data/spec/dotdir/blast_2.4.0/blastn_nohits/job.yaml +8 -0
  101. data/spec/dotdir/blast_2.4.0/blastp/BLASTP_XML_2.4.0.xml +1181 -0
  102. data/spec/dotdir/blast_2.4.0/blastp/job.yaml +8 -0
  103. data/spec/dotdir/blast_2.4.0/blastx/BLASTX_XML_2.4.0.xml +1181 -0
  104. data/spec/dotdir/blast_2.4.0/blastx/job.yaml +8 -0
  105. data/spec/dotdir/blast_2.4.0/tblastn/TBLASTN_XML_2.4.0.xml +1181 -0
  106. data/spec/dotdir/blast_2.4.0/tblastn/job.yaml +8 -0
  107. data/spec/dotdir/blast_2.4.0/tblastx/TBLASTX_XML_2.4.0.xml +8857 -0
  108. data/spec/dotdir/blast_2.4.0/tblastx/job.yaml +8 -0
  109. data/spec/dotdir/blast_2.5.0/blastn/BLASTN_LONG_XML_2.5.0.xml +18813 -0
  110. data/spec/dotdir/blast_2.5.0/blastn/BLASTN_XML_2.5.0.xml +1201 -0
  111. data/spec/dotdir/blast_2.5.0/blastn/job.yaml +8 -0
  112. data/spec/dotdir/blast_2.5.0/blastn_nohits/BLASTN_NO_HITS_XML_2.5.0.xml +866 -0
  113. data/spec/dotdir/blast_2.5.0/blastn_nohits/job.yaml +8 -0
  114. data/spec/dotdir/blast_2.5.0/blastp/BLASTP +2161 -0
  115. data/spec/dotdir/blast_2.5.0/blastp/BLASTP_XML_2.5.0.xml +1181 -0
  116. data/spec/dotdir/blast_2.5.0/blastp/job.yaml +8 -0
  117. data/spec/dotdir/blast_2.5.0/blastx/BLASTX +28080 -0
  118. data/spec/dotdir/blast_2.5.0/blastx/BLASTX_XML_2.5.0.xml +1181 -0
  119. data/spec/dotdir/blast_2.5.0/blastx/job.yaml +8 -0
  120. data/spec/dotdir/blast_2.5.0/tblastn/TBLASTN +29486 -0
  121. data/spec/dotdir/blast_2.5.0/tblastn/TBLASTN_XML_2.5.0.xml +1181 -0
  122. data/spec/dotdir/blast_2.5.0/tblastn/job.yaml +8 -0
  123. data/spec/dotdir/blast_2.5.0/tblastx/TBLASTX +180859 -0
  124. data/spec/dotdir/blast_2.5.0/tblastx/TBLASTX_XML_2.5.0.xml +8857 -0
  125. data/spec/dotdir/blast_2.5.0/tblastx/job.yaml +8 -0
  126. data/spec/dotdir/blast_2.6.0/blastn/BLASTN_XML_2.6.0.xml +1201 -0
  127. data/spec/dotdir/blast_2.6.0/blastn/job.yaml +8 -0
  128. data/spec/dotdir/blast_2.6.0/blastn_nohits/BLASTN_NO_HITS_XML_2.6.0.xml +866 -0
  129. data/spec/dotdir/blast_2.6.0/blastn_nohits/job.yaml +8 -0
  130. data/spec/dotdir/blast_2.6.0/blastp/BLASTP_XML_2.6.0.xml +1181 -0
  131. data/spec/dotdir/blast_2.6.0/blastp/job.yaml +8 -0
  132. data/spec/dotdir/blast_2.6.0/blastx/BLASTX_XML_2.6.0.xml +1181 -0
  133. data/spec/dotdir/blast_2.6.0/blastx/job.yaml +8 -0
  134. data/spec/dotdir/blast_2.6.0/tblastn/TBLASTN_XML_2.6.0.xml +1181 -0
  135. data/spec/dotdir/blast_2.6.0/tblastn/job.yaml +8 -0
  136. data/spec/dotdir/blast_2.6.0/tblastx/TBLASTX_XML_2.6.0.xml +8857 -0
  137. data/spec/dotdir/blast_2.6.0/tblastx/job.yaml +8 -0
  138. data/spec/dotdir/blast_2.7.1/blastn/BLASTN_XML_2.7.1.xml +1201 -0
  139. data/spec/dotdir/blast_2.7.1/blastn/job.yaml +8 -0
  140. data/spec/dotdir/blast_2.7.1/blastn_nohits/BLASTN_NO_HITS_XML_2.7.1.xml +866 -0
  141. data/spec/dotdir/blast_2.7.1/blastn_nohits/job.yaml +8 -0
  142. data/spec/dotdir/blast_2.7.1/blastp/BLASTP_XML_2.7.1.xml +1181 -0
  143. data/spec/dotdir/blast_2.7.1/blastp/job.yaml +8 -0
  144. data/spec/dotdir/blast_2.7.1/blastx/BLASTX_XML_2.7.1.xml +1181 -0
  145. data/spec/dotdir/blast_2.7.1/blastx/job.yaml +8 -0
  146. data/spec/dotdir/blast_2.7.1/tblastn/TBLASTN_XML_2.7.1.xml +1181 -0
  147. data/spec/dotdir/blast_2.7.1/tblastn/job.yaml +8 -0
  148. data/spec/dotdir/blast_2.7.1/tblastx/TBLASTX_XML_2.7.1.xml +8857 -0
  149. data/spec/dotdir/blast_2.7.1/tblastx/job.yaml +8 -0
  150. data/spec/dotdir/blast_2.8.1/blastn/BLASTN_XML_2.8.1.xml +1201 -0
  151. data/spec/dotdir/blast_2.8.1/blastn/job.yaml +8 -0
  152. data/spec/dotdir/blast_2.8.1/blastn_nohits/BLASTN_NO_HITS_XML_2.8.1.xml +866 -0
  153. data/spec/dotdir/blast_2.8.1/blastn_nohits/job.yaml +8 -0
  154. data/spec/dotdir/blast_2.8.1/blastp/BLASTP_XML_2.8.1.xml +1181 -0
  155. data/spec/dotdir/blast_2.8.1/blastp/job.yaml +8 -0
  156. data/spec/dotdir/blast_2.8.1/blastx/BLASTX_XML_2.8.1.xml +1181 -0
  157. data/spec/dotdir/blast_2.8.1/blastx/job.yaml +8 -0
  158. data/spec/dotdir/blast_2.8.1/tblastn/TBLASTN_XML_2.8.1.xml +1181 -0
  159. data/spec/dotdir/blast_2.8.1/tblastn/job.yaml +8 -0
  160. data/spec/dotdir/blast_2.8.1/tblastx/TBLASTX_XML_2.8.1.xml +8857 -0
  161. data/spec/dotdir/blast_2.8.1/tblastx/job.yaml +8 -0
  162. data/spec/dotdir/blast_2.9.0/blastn/BLASTN_XML_2.9.0.xml +1201 -0
  163. data/spec/dotdir/blast_2.9.0/blastn/job.yaml +8 -0
  164. data/spec/dotdir/blast_2.9.0/blastn_nohits/BLASTN_NO_HITS_XML_2.9.0.xml +866 -0
  165. data/spec/dotdir/blast_2.9.0/blastn_nohits/job.yaml +8 -0
  166. data/spec/dotdir/blast_2.9.0/blastp/BLASTP_XML_2.9.0.xml +1181 -0
  167. data/spec/dotdir/blast_2.9.0/blastp/job.yaml +8 -0
  168. data/spec/dotdir/blast_2.9.0/blastx/BLASTX_XML_2.9.0.xml +1181 -0
  169. data/spec/dotdir/blast_2.9.0/blastx/job.yaml +8 -0
  170. data/spec/dotdir/blast_2.9.0/tblastn/TBLASTN_XML_2.9.0.xml +1181 -0
  171. data/spec/dotdir/blast_2.9.0/tblastn/job.yaml +8 -0
  172. data/spec/dotdir/blast_2.9.0/tblastx/TBLASTX_XML_2.9.0.xml +8857 -0
  173. data/spec/dotdir/blast_2.9.0/tblastx/job.yaml +8 -0
  174. data/spec/dotdir/diamond_0.9.24/blastp/DIAMOND_BLASTP_0.9.24.xml +1040 -0
  175. data/spec/dotdir/diamond_0.9.24/blastp/job.yaml +8 -0
  176. data/spec/dotdir/diamond_0.9.24/blastx/DIAMOND_BLASTX_0.9.24.xml +1040 -0
  177. data/spec/dotdir/diamond_0.9.24/blastx/job.yaml +8 -0
  178. data/spec/dotdir/diamond_0.9.24/blastx_nohits/DIAMOND_BLASTX_NOHITS_0.9.24.xml +41 -0
  179. data/spec/dotdir/diamond_0.9.24/blastx_nohits/job.yaml +8 -0
  180. data/spec/download_helper.rb +32 -0
  181. data/spec/import_spec_capybara_local.rb +61 -0
  182. data/spec/sequence_spec.rb +1 -2
  183. data/spec/sequences/Nucleotide_TP53_COX41.fasta +15 -0
  184. data/spec/sequences/Protein_TP53_COX41.fasta +12 -0
  185. data/spec/sequences/Query_1_SI2_2_0_06267.txt +6 -0
  186. data/spec/{nucleotide_query.fa → sequences/nucleotide_query.fa} +0 -0
  187. data/spec/sequences/problematic_query.fa +5 -0
  188. data/spec/sequences/protein_query.fa +9 -0
  189. data/spec/sequences/sample_query_fire_ant_obps.fa +44 -0
  190. data/spec/sequences/sequenceserver-SI2.2.0_06267.fa +5 -0
  191. data/spec/sequenceserver_spec.rb +3 -3
  192. data/spec/spec_helper.rb +59 -0
  193. metadata +185 -28
  194. data/spec/protein_query.fa +0 -21
  195. data/spec/sample_reports/blastn_sample/job.yaml +0 -10
  196. data/spec/sample_reports/blastn_sample/stdout +0 -144
  197. data/spec/sample_reports/blastp_sample/job.yaml +0 -10
  198. data/spec/sample_reports/blastp_sample/stdout +0 -1187
  199. data/spec/sample_reports/blastx_sample/job.yaml +0 -10
  200. data/spec/sample_reports/blastx_sample/stdout +0 -1191
  201. data/spec/sample_reports/no_hits_sample/job.yaml +0 -10
  202. data/spec/sample_reports/no_hits_sample/stdout +0 -130
  203. data/spec/sample_reports/tblastn_sample/job.yaml +0 -10
  204. data/spec/sample_reports/tblastn_sample/stdout +0 -1107
  205. data/spec/sample_reports/tblastx_sample/job.yaml +0 -10
  206. data/spec/sample_reports/tblastx_sample/stdout +0 -2422
  207. data/spec/sample_reports/with_hits_sample/job.yaml +0 -10
  208. data/spec/sample_reports/with_hits_sample/stdout +0 -1489
@@ -18,7 +18,9 @@ module SequenceServer
18
18
  'The requested job could not be found'
19
19
  end
20
20
 
21
- undef_method :backtrace
21
+ def more_info
22
+ ''
23
+ end
22
24
  end
23
25
 
24
26
  # Errors caused due to incorrect user input.
@@ -1,12 +1,9 @@
1
1
  module SequenceServer
2
2
  # Define BLAST::Hit.
3
3
  module BLAST
4
- # Hit Object to store all the hits per Query. HSPs per hit should be sorted
5
- # in ascending order of evalue.
4
+ # Hit object to store all the hits per Query.
6
5
  Hit = Struct.new(:query, :number, :id, :accession, :title,
7
6
  :length, :sciname, :qcovs, :hsps) do
8
- include Links
9
-
10
7
  def initialize(*args)
11
8
  args[1] = args[1].to_i
12
9
  args[4] = '' if args[4] == 'No definition line'
@@ -16,29 +13,45 @@ module SequenceServer
16
13
  super
17
14
  end
18
15
 
19
- # Hit's score is the sum of score of all HSPs.
20
- def score
21
- hsps.map(&:score).reduce(:+)
16
+ # This gets called when #to_json is called on report object in routes. We
17
+ # cannot use the to_json method provided by Struct class because what we
18
+ # want to send to the browser differs from the attributes declared with
19
+ # Struct class. Some of these are derived data such as score, identity,
20
+ # custom links. While some attributes are necessary for internal
21
+ # representation.
22
+ def to_json(*args)
23
+ # List all attributes that we want to send to the browser.
24
+ properties = %i[number id accession title length score identity
25
+ qcovs sciname evalue hsps links]
26
+ properties.inject({}) { |h, k| h[k] = send(k); h }.to_json(*args)
22
27
  end
23
28
 
24
- # Hit's identity is the sum of identity of all
25
- # HSPs divided by sum of length of all HSPs
26
- # (expressed as percentagge).
27
- def identity
28
- hsps.map(&:identity).reduce(:+) * 100 / hsps.map(&:length).reduce(:+)
29
- end
29
+ ###
30
+ # Link generator functionality.
31
+ ###
32
+
33
+ # Include the Links module.
34
+ include Links
30
35
 
36
+ # Links returns a list of Hashes that can be easily turned into an href
37
+ # in the client. These are derived by calling link generators, that is,
38
+ # instance methods of the Links module.
31
39
  def links
32
40
  links = Links.instance_methods.map { |m| send m }
33
41
  links.compact!
34
42
  links.sort_by { |link| [link[:order], link[:title]] }
35
43
  end
36
44
 
45
+ # Returns the database type (nucleotide or protein).
46
+ def dbtype
47
+ report.dbtype
48
+ end
49
+
37
50
  # Returns a list of databases that contain this hit.
38
51
  #
39
52
  # e.g., whichdb('SI_2.2.23') => [<Database: ...>, ...]
40
53
  def whichdb
41
- querydb.select { |db| db.include? id }
54
+ report.querydb.select { |db| db.include? id }
42
55
  end
43
56
 
44
57
  # Returns tuple of tuple indicating start and end coordinates of matched
@@ -52,26 +65,33 @@ module SequenceServer
52
65
  [[qstart_min, qend_max], [sstart_min, send_max]]
53
66
  end
54
67
 
55
- # NOTE: Evalue of a hit is meaningless. This is here for code that needs
56
- # minimum evalue of all HSPs.
57
- def evalue
58
- hsps.first.evalue
59
- end
68
+ ###
69
+ # Score, identity, and evalue attributes below are used in tabular summary
70
+ # of hits in the HTML report. At some point we should move these to the
71
+ # client.
72
+ ###
60
73
 
61
- def to_json(*args)
62
- %i[number id accession title length score identity qcovs
63
- sciname evalue hsps links].inject({}) { |h, k|
64
- h[k] = send(k)
65
- h
66
- }.to_json(*args)
74
+ # Returns the sum of scores of all HSPs.
75
+ def score
76
+ hsps.map(&:score).reduce(:+)
67
77
  end
68
78
 
69
- private
79
+ # Returns the sum of identity of all HSPs divided by sum of length of all
80
+ # HSPs (expressed as percentage).
81
+ def identity
82
+ hsps.map(&:identity).reduce(:+) * 100 / hsps.map(&:length).reduce(:+)
83
+ end
70
84
 
71
- def querydb
72
- report.querydb
85
+ # Returns the minimum evalue of all HSPs of the Hit. This is shown in the
86
+ # tabular overview of hits in the HTML report.
87
+ def evalue
88
+ hsps.first.evalue
73
89
  end
74
90
 
91
+ private
92
+
93
+ # Returns the report object that this hit is a part of. This is used to
94
+ # access list of databases etc.
75
95
  def report
76
96
  query.report
77
97
  end
@@ -6,13 +6,26 @@ module SequenceServer
6
6
  # Extends SequenceServer::Job to describe a BLAST job.
7
7
  class Job < Job
8
8
  def initialize(params)
9
- validate params
10
- super do
11
- @method = params[:method]
12
- @qfile = store('query.fa', params[:sequence])
13
- @databases = Database[params[:databases]]
14
- @options = params[:advanced].to_s.strip + defaults
15
- @advanced_params = parse_advanced params[:advanced]
9
+ if params.key?(:xml)
10
+ super do
11
+ @imported_xml_file = File.basename params[:xml]
12
+ # Copy over the XML file to job directory so that a job dir in
13
+ # itself is self-contained. This will help with tests among
14
+ # other things.
15
+ FileUtils.cp(params[:xml], dir)
16
+ @advanced_params = {}
17
+ @databases = []
18
+ done!
19
+ end
20
+ else
21
+ validate params
22
+ super do
23
+ @method = params[:method]
24
+ @qfile = store('query.fa', params[:sequence])
25
+ @databases = Database[params[:databases]]
26
+ @options = params[:advanced].to_s.strip + defaults
27
+ @advanced_params = parse_advanced params[:advanced]
28
+ end
16
29
  end
17
30
  end
18
31
 
@@ -22,6 +35,13 @@ module SequenceServer
22
35
  # Attributes used by us - should be considered private.
23
36
  attr_reader :method, :qfile, :databases, :options
24
37
 
38
+ # :nodoc:
39
+ # Returns path to the imported xml file if the job was created using the
40
+ # --import switch. Returns nil otherwise.
41
+ def imported_xml_file
42
+ File.join(dir, @imported_xml_file) if @imported_xml_file
43
+ end
44
+
25
45
  # Returns the command that will be executed. Job super class takes care
26
46
  # of actual execution.
27
47
  def command
@@ -146,7 +166,7 @@ module SequenceServer
146
166
  end
147
167
 
148
168
  def disallowed_options
149
- /-out|-html|-outfmt|-db|-query|-num_threads/i
169
+ /-out|-html|-outfmt|-db |-query|-num_threads/i
150
170
  end
151
171
  end
152
172
  end
@@ -29,18 +29,38 @@ module SequenceServer
29
29
  end
30
30
  end
31
31
 
32
- # :nodoc:
33
32
  # Attributes parsed out from XML output.
34
- attr_reader :program, :program_version
35
- attr_reader :queries, :querydb
36
- attr_reader :params, :stats
33
+ attr_reader :program, :program_version, :params, :stats, :queries
34
+
35
+ # This is obtained from the job object.
36
+ attr_reader :querydb
37
+
38
+ # Returns database type (nucleotide or protein) used for running BLAST
39
+ # search. If we ran the BLAST search, this information is available
40
+ # from Job#databases. For imported XML, this is inferred from
41
+ # Report#program (i.e., the BLAST algorithm)
42
+ def dbtype
43
+ return @dbtype if @dbtype
44
+ @dbtype = if @querydb.empty?
45
+ case program
46
+ when /blastn|tblastn|tblastx/
47
+ 'nucleotide'
48
+ when /blastp|blastx/
49
+ 'protein'
50
+ end
51
+ else
52
+ @querydb.first.type
53
+ end
54
+ end
37
55
 
38
56
  def to_json
39
57
  [:querydb, :program, :program_version, :params, :stats,
40
58
  :queries].inject({}) { |h, k|
41
59
  h[k] = send(k)
42
60
  h
43
- }.update(search_id: job.id, submitted_at: job.submitted_at.utc).to_json
61
+ }.update(search_id: job.id,
62
+ submitted_at: job.submitted_at.utc,
63
+ imported_xml: !!job.imported_xml_file).to_json
44
64
  end
45
65
 
46
66
  private
@@ -48,12 +68,25 @@ module SequenceServer
48
68
  # Generate report.
49
69
  def generate
50
70
  job.raise!
51
- xml_ir = parse_xml File.read(Formatter.run(job, 'xml').file)
52
- tsv_ir = parse_tsv File.read(Formatter.run(job, 'custom_tsv').file)
53
- extract_program_info xml_ir
54
- extract_params xml_ir
55
- extract_stats xml_ir
56
- extract_queries xml_ir, tsv_ir
71
+ if job.imported_xml_file
72
+ xml_ir = parse_xml File.read(job.imported_xml_file)
73
+ tsv_ir = Hash.new do |h1,k1|
74
+ h1[k1] = Hash.new do |h2,k2|
75
+ h2[k2]=['','',[]]
76
+ end
77
+ end
78
+ extract_program_info xml_ir
79
+ extract_params xml_ir
80
+ extract_stats xml_ir
81
+ extract_queries xml_ir, tsv_ir
82
+ else
83
+ xml_ir = parse_xml File.read(Formatter.run(job, 'xml').file)
84
+ tsv_ir = parse_tsv File.read(Formatter.run(job, 'custom_tsv').file )
85
+ extract_program_info xml_ir
86
+ extract_params xml_ir
87
+ extract_stats xml_ir
88
+ extract_queries xml_ir, tsv_ir
89
+ end
57
90
  end
58
91
 
59
92
  # Make program name and program name + version available via `program`
@@ -107,10 +140,14 @@ module SequenceServer
107
140
  def extract_hits(xml_ir, tsv_ir, query)
108
141
  return if xml_ir == ["\n"] # => No hits.
109
142
  xml_ir.each do |n|
110
- # If hit comes from a non -parse_seqids database, then
111
- # we assign id to accession and process hit defline to
112
- # obtain id and title.
113
- if n[1] =~ /^gnl\|/
143
+ # If hit comes from a non -parse_seqids database, then id (n[1]) is a
144
+ # BLAST assigned internal id of the format 'gnl|BL_ORD_ID|serial'. We
145
+ # assign the id to accession (because we use accession for sequence
146
+ # retrieval and this id is what blastdbcmd expects for non
147
+ # -parse_seqids databases) and parse the hit defline to
148
+ # obtain id and title ourselves (we use id and title
149
+ # for display purposes).
150
+ if n[1] =~ /^gnl\|BL_ORD_ID\|\d+/
114
151
  n[3] = n[1]
115
152
  defline = n[2].split
116
153
  n[1] = defline.shift
@@ -36,6 +36,19 @@ module SequenceServer
36
36
 
37
37
  attr_reader :id
38
38
 
39
+ def retrieve(accession, coords = nil)
40
+ cmd = "blastdbcmd -db #{name} -entry '#{accession}'"
41
+ if coords
42
+ cmd << " -range #{coords}"
43
+ end
44
+ out, = sys(cmd, path: config[:bin])
45
+ out.chomp
46
+ rescue CommandFailed
47
+ # Command failed beacuse stdout was empty, meaning accession not
48
+ # present in this database.
49
+ nil
50
+ end
51
+
39
52
  def include?(accession)
40
53
  cmd = "blastdbcmd -entry '#{accession}' -db #{name}"
41
54
  out, = sys(cmd, path: config[:bin])
@@ -103,6 +116,64 @@ module SequenceServer
103
116
  collection.values.to_json
104
117
  end
105
118
 
119
+ # Retrieve given loci from the databases we have.
120
+ #
121
+ # loci to retrieve are specified as a String:
122
+ #
123
+ # "accession_1,accession_2:start-stop,accession_3"
124
+ #
125
+ # Return value is a FASTA format String containing sequences in the same
126
+ # order in which they were requested. If an accession could not be found,
127
+ # a commented out error message is included in place of the sequence.
128
+ # Sequences are retrieved from the first database in which the accession
129
+ # is found. The returned sequences can, thus, be incorrect if accessions
130
+ # are not unique across all database (admins should make sure of that).
131
+ def retrieve(loci)
132
+ # Exit early if loci is nil.
133
+ return unless loci
134
+
135
+ # String -> Array
136
+ # We may have empty string if loci contains a double comma as a result
137
+ # of typo (remember - loci is external input). These are eliminated.
138
+ loci = loci.split(',').delete_if(&:empty?)
139
+
140
+ # Each database is searched for each locus. For each locus, search is
141
+ # terminated on the first database match.
142
+ # NOTE: This can return incorrect sequence if the sequence ids are
143
+ # not unique across all databases.
144
+ seqs = loci.map do |locus|
145
+ # Get sequence id and coords. coords may be nil. accession can't
146
+ # be.
147
+ accession, coords = locus.split(':')
148
+
149
+ # Initialise a variable to store retrieved sequence.
150
+ seq = nil
151
+
152
+ # Go over each database looking for this accession.
153
+ each do |database|
154
+ # Database lookup will return a string if given accession is
155
+ # present in the database, nil otherwise.
156
+ seq = database.retrieve(accession, coords)
157
+ # Found a match! Terminate iteration returning the retrieved
158
+ # sequence.
159
+ break if seq
160
+ end
161
+
162
+ # If accession was not present in any database, insert an error
163
+ # message in place of the sequence. The line starts with '#'
164
+ # and should be ignored by BLAST (not tested).
165
+ unless seq
166
+ seq = "# ERROR: #{locus} not found in any database"
167
+ end
168
+
169
+ # Return seq.
170
+ seq
171
+ end
172
+
173
+ # Array -> String
174
+ seqs.join("\n")
175
+ end
176
+
106
177
  # Intended to be used only for testing.
107
178
  def first
108
179
  all.first
@@ -61,21 +61,16 @@ module SequenceServer
61
61
  end
62
62
  end
63
63
 
64
- ## BLAST NOT INSTALLED OR NOT COMPATIBLE ##
64
+ ## BLAST NOT INSTALLED, NOT EXECUTABLE, OR NOT COMPATIBLE ##
65
65
 
66
66
  # Raised if SequenceServer could not locate NCBI BLAST+ installation on
67
67
  # user's system.
68
68
  class BLAST_NOT_INSTALLED_OR_NOT_EXECUTABLE < StandardError
69
69
  def to_s
70
- 'BLAST not installed, or is not executable.'
71
- end
72
- end
73
-
74
- # Raised if SequenceServer could not successfully execute 'blastp -version'
75
- # on user's system (see #141).
76
- class BLAST_NOT_EXECUTABLE < StandardError
77
- def to_s
78
- 'Error executing BLAST+ binaries.'
70
+ <<~MSG
71
+ BLAST+ is either not installed, or there is a problem with the
72
+ installed version.
73
+ MSG
79
74
  end
80
75
  end
81
76
 
@@ -25,7 +25,7 @@ module SequenceServer
25
25
  # Creates and queues a job. Returns created job object.
26
26
  def create(params)
27
27
  job = BLAST::Job.new(params) # TODO: Dynamic dispatch.
28
- SequenceServer.pool.queue { job.run }
28
+ pool.queue { job.run }
29
29
  job
30
30
  end
31
31
 
@@ -46,6 +46,13 @@ module SequenceServer
46
46
  Dir["#{DOTDIR}/**/job.yaml"]
47
47
  .map { |f| fetch File.basename File.dirname f }
48
48
  end
49
+
50
+ private
51
+
52
+ # Thread pool used for running BLAST searches.
53
+ def pool
54
+ @pool ||= Pool.new(SequenceServer.config[:num_threads])
55
+ end
49
56
  end
50
57
 
51
58
  include FileUtils
@@ -9,8 +9,10 @@ module SequenceServer
9
9
 
10
10
  NCBI_ID_PATTERN = /gi\|(\d+)\|/
11
11
  UNIPROT_ID_PATTERN = /sp\|(\w+)\|/
12
+ PFAM_ID_PATTERN = /(PF\d{5}\.?\d*)/
13
+ RFAM_ID_PATTERN = /(RF\d{5})/
12
14
 
13
- # Link generators return a Hash like below.
15
+ # Link generators are methods that return a Hash as defined below.
14
16
  #
15
17
  # {
16
18
  # # Required. Display title.
@@ -45,25 +47,29 @@ module SequenceServer
45
47
  # sequence_id = encode sequence_id
46
48
  # url = "http://www.ncbi.nlm.nih.gov/nucleotide/#{sequence_id}"
47
49
  #
48
- # querydb:
49
- # Returns an array of databases that were used for BLASTing.
50
+ # dbtype:
51
+ # Returns the database type (nucleotide or protein) that was used for
52
+ # BLAST search.
50
53
  #
51
54
  # whichdb:
52
- # Returns the database from which the given hit came from.
53
- #
54
- # e.g:
55
- #
56
- # hit_database = whichdb
57
- #
58
- # Examples:
59
- # ---------
60
- # See methods provided by default for an example implementation.
55
+ # Returns the databases from which the hit could have originated. To
56
+ # ensure that one and the correct database is returned, ensure that
57
+ # your sequence ids are unique across different FASTA files.
58
+ # NOTE: This method is slow.
59
+ #
60
+ # coordinates:
61
+ # Returns min alignment start and max alignment end coordinates for
62
+ # query and hit sequences.
63
+ #
64
+ # e.g.,
65
+ # query_coords = coordinates[0]
66
+ # hit_coords = coordinates[1]
61
67
 
62
68
  def ncbi
63
- return nil unless id.match(NCBI_ID_PATTERN)
69
+ return nil unless id.match(NCBI_ID_PATTERN) or title.match(NCBI_ID_PATTERN)
64
70
  ncbi_id = Regexp.last_match[1]
65
71
  ncbi_id = encode ncbi_id
66
- url = "http://www.ncbi.nlm.nih.gov/#{querydb.first.type}/#{ncbi_id}"
72
+ url = "https://www.ncbi.nlm.nih.gov/#{dbtype}/#{ncbi_id}"
67
73
  {
68
74
  order: 2,
69
75
  title: 'NCBI',
@@ -73,13 +79,39 @@ module SequenceServer
73
79
  end
74
80
 
75
81
  def uniprot
76
- return nil unless id.match(UNIPROT_ID_PATTERN)
82
+ return nil unless id.match(UNIPROT_ID_PATTERN) or title.match(UNIPROT_ID_PATTERN)
77
83
  uniprot_id = Regexp.last_match[1]
78
84
  uniprot_id = encode uniprot_id
79
- url = "http://www.uniprot.org/uniprot/#{uniprot_id}"
85
+ url = "https://www.uniprot.org/uniprot/#{uniprot_id}"
86
+ {
87
+ order: 2,
88
+ title: 'UniProt',
89
+ url: url,
90
+ icon: 'fa-external-link'
91
+ }
92
+ end
93
+
94
+ def pfam
95
+ return nil unless id.match(PFAM_ID_PATTERN) or title.match(PFAM_ID_PATTERN)
96
+ pfam_id = Regexp.last_match[1]
97
+ pfam_id = encode pfam_id
98
+ url = "https://pfam.xfam.org/family/#{pfam_id}"
99
+ {
100
+ order: 2,
101
+ title: 'Pfam',
102
+ url: url,
103
+ icon: 'fa-external-link'
104
+ }
105
+ end
106
+
107
+ def rfam
108
+ return nil unless id.match(RFAM_ID_PATTERN) or title.match(RFAM_ID_PATTERN)
109
+ rfam_id = Regexp.last_match[1]
110
+ rfam_id = encode rfam_id
111
+ url = "https://rfam.xfam.org/family/#{rfam_id}"
80
112
  {
81
113
  order: 2,
82
- title: 'Uniprot',
114
+ title: 'Rfam',
83
115
  url: url,
84
116
  icon: 'fa-external-link'
85
117
  }