bio 1.2.1 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -4,7 +4,7 @@
4
4
  # Copyright:: Copyright (C) 2002, 2003 Toshiaki Katayama <k@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: rexml.rb,v 1.12 2007/04/05 23:35:39 trevor Exp $
7
+ # $Id:$
8
8
  #
9
9
  # == Note
10
10
  #
@@ -37,9 +37,7 @@ module Bio
37
37
  case name
38
38
  when 'BlastOutput_param'
39
39
  e.elements["Parameters"].each_element_with_text do |p|
40
- k = p.name.sub(/Parameters_/, '')
41
- v = p.text =~ /\D/ ? p.text : p.text.to_i
42
- @parameters[k] = v
40
+ xml_set_parameter(p.name, p.text)
43
41
  end
44
42
  else
45
43
  hash[name] = text if text.strip.size > 0
@@ -72,8 +70,17 @@ module Bio
72
70
  v = s.text =~ /\D/ ? s.text.to_f : s.text.to_i
73
71
  iteration.statistics[k] = v
74
72
  end
73
+
74
+ # for new BLAST XML format
75
+ when 'Iteration_query-ID'
76
+ iteration.query_id = i.text
77
+ when 'Iteration_query-def'
78
+ iteration.query_def = i.text
79
+ when 'Iteration_query-len'
80
+ iteration.query_len = i.text.to_i
75
81
  end
76
- end
82
+ end #case i.name
83
+
77
84
  return iteration
78
85
  end
79
86
 
@@ -0,0 +1,277 @@
1
+ #
2
+ # = bio/appl/blast/rpsblast.rb - NCBI RPS Blast default output parser
3
+ #
4
+ # Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org>
5
+ # License:: The Ruby License
6
+ #
7
+ # $Id:$
8
+ #
9
+ # == Description
10
+ #
11
+ # NCBI RPS Blast (Reversed Position Specific Blast) default
12
+ # (-m 0 option) output parser class, Bio::Blast::RPSBlast::Report
13
+ # and related classes/modules.
14
+ #
15
+ # == References
16
+ #
17
+ # * Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,
18
+ # Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),
19
+ # "Gapped BLAST and PSI-BLAST: a new generation of protein database search
20
+ # programs", Nucleic Acids Res. 25:3389-3402.
21
+ # * ftp://ftp.ncbi.nih.gov/blast/documents/rpsblast.html
22
+ # * http://www.ncbi.nlm.nih.gov/Structure/cdd/cdd_help.shtml
23
+ #
24
+
25
+ require 'bio/appl/blast/format0'
26
+ require 'bio/io/flatfile'
27
+
28
+ module Bio
29
+ class Blast
30
+
31
+ # NCBI RPS Blast (Reversed Position Specific Blast) namespace.
32
+ # Currently, this module is existing only for separating namespace.
33
+ # To parse RPSBlast results, see Bio::Blast::RPSBlast::Report documents.
34
+ module RPSBlast
35
+
36
+ # Flatfile splitter for RPS-BLAST reports.
37
+ # It is internally used when reading RPS-BLAST report.
38
+ # Normally, users do not need to use it directly.
39
+ #
40
+ # Note for Windows: RPS-BLAST results generated in Microsoft Windows
41
+ # may not be parsed correctly due to the line feed code problem.
42
+ # For a workaroud, convert line feed codes from Windows(DOS) to UNIX.
43
+ #
44
+ class RPSBlastSplitter < Bio::FlatFile::Splitter::Template
45
+
46
+ # Separator used to distinguish start of each report
47
+ ReportHead = /\A\n*(RPS\-BLAST|Query\=)/
48
+
49
+ # Delimiter used for IO#gets
50
+ Delimiter = "\n\n"
51
+
52
+ # creates a new splitter object
53
+ def initialize(klass, bstream)
54
+ super(klass, bstream)
55
+ @entry_head = nil
56
+ end
57
+
58
+ # Skips leader of the entry.
59
+ # In this class, only skips space characters.
60
+ def skip_leader
61
+ stream.skip_spaces
62
+ return nil
63
+ end
64
+
65
+ # Rewinds the stream
66
+ def rewind
67
+ @entry_head = nil
68
+ super
69
+ end
70
+
71
+ # gets an entry
72
+ def get_entry
73
+ p0 = stream_pos()
74
+ pieces = []
75
+ flag_head = false # reached to start of header
76
+ flag_body = false # reached to start of body (Query=...)
77
+ while x = stream.gets(Delimiter)
78
+ if ReportHead =~ x then
79
+ case $1
80
+ when 'RPS-BLAST'
81
+ if pieces.empty? then
82
+ @entry_head = nil
83
+ flag_head = true
84
+ else
85
+ stream.ungets(x)
86
+ break
87
+ end
88
+ when 'Query='
89
+ if flag_body then
90
+ stream.ungets(x)
91
+ break
92
+ else
93
+ @entry_head = pieces.join('') if flag_head
94
+ flag_body = true
95
+ end
96
+ else
97
+ raise 'Bug: should not reach here'
98
+ end
99
+ end #if ReportHead...
100
+ pieces.push x
101
+ end #while
102
+ p1 = stream_pos()
103
+
104
+ self.entry_start_pos = p0
105
+ self.entry =
106
+ if pieces.empty? then
107
+ nil
108
+ elsif !flag_head and @entry_head then
109
+ @entry_head + pieces.join('')
110
+ else
111
+ pieces.join('')
112
+ end
113
+ self.entry_ended_pos = p1
114
+ return self.entry
115
+ end
116
+ end #class RPSBlastSplitter
117
+
118
+ # NCBI RPS Blast (Reversed Position Specific Blast)
119
+ # default output parser.
120
+ #
121
+ # It supports defalut (-m 0 option) output of the "rpsblast" command.
122
+ #
123
+ # Because this class inherits Bio::Blast::Default::Report,
124
+ # almost all methods are eqaul to Bio::Blast::Default::Report.
125
+ # Only DELIMITER (and RS) and few methods are different.
126
+ #
127
+ # By using Bio::FlatFile, (for example, Bio::FlatFile.open),
128
+ # rpsblast result generated from multiple query sequences is
129
+ # automatically splitted into multiple
130
+ # Bio::BLast::RPSBlast::Report objects corresponding to
131
+ # query sequences.
132
+ #
133
+ # Note for multi-fasta results WITH using Bio::FlatFile:
134
+ # Each splitted result is concatenated with header of the
135
+ # result which describes RPS-BLAST version and database
136
+ # information, if possible.
137
+ #
138
+ # Note for multi-fasta results WITHOUT using Bio::FlatFile:
139
+ # When parsing an output of rpsblast command running with
140
+ # multi-fasta sequences WITHOUT using Bio::FlatFile,
141
+ # each query's result is stored as an "iteration" of PSI-Blast.
142
+ # This behavior may be changed in the future.
143
+ #
144
+ # Note for nucleotide results: This class is not tested with
145
+ # nucleotide query and/or nucleotide databases.
146
+ #
147
+ class Report < Bio::Blast::Default::Report
148
+ # Delimter of each entry for RPS-BLAST.
149
+ DELIMITER = RS = "\nRPS-BLAST"
150
+
151
+ # (Integer) excess read size included in DELIMITER.
152
+ DELIMITER_OVERRUN = 9 # "RPS-BLAST"
153
+
154
+ # splitter for Bio::FlatFile support
155
+ FLATFILE_SPLITTER = RPSBlastSplitter
156
+
157
+ # Creates a new Report object from a string.
158
+ #
159
+ # Using Bio::FlatFile.open (or some other methods)
160
+ # is recommended instead of using this method directly.
161
+ # Refer Bio::Blast::RPSBlast::Report document for more information.
162
+ #
163
+ # Note for multi-fasta results WITHOUT using Bio::FlatFile:
164
+ # When parsing an output of rpsblast command running with
165
+ # multi-fasta sequences WITHOUT using Bio::FlatFile,
166
+ # each query's result is stored as an "iteration" of PSI-Blast.
167
+ # This behavior may be changed in the future.
168
+ #
169
+ # Note for nucleotide results: This class is not tested with
170
+ # nucleotide query and/or nucleotide databases.
171
+ #
172
+ def initialize(str)
173
+ str = str.sub(/\A\s+/, '')
174
+ # remove trailing entries for sure
175
+ str.sub!(/\n(RPS\-BLAST.*)/m, "\n")
176
+ @entry_overrun = $1
177
+ @entry = str
178
+ data = str.split(/(?:^[ \t]*\n)+/)
179
+
180
+ if data[0] and /\AQuery\=/ !~ data[0] then
181
+ format0_split_headers(data)
182
+ end
183
+ @iterations = format0_split_search(data)
184
+ format0_split_stat_params(data)
185
+ end
186
+
187
+ # Returns definition of the query.
188
+ # For a result of multi-fasta input, the first query's definition
189
+ # is returned (The same as <tt>iterations.first.query_def</tt>).
190
+ def query_def
191
+ iterations.first.query_def
192
+ end
193
+
194
+ # Returns length of the query.
195
+ # For a result of multi-fasta input, the first query's length
196
+ # is returned (The same as <tt>iterations.first.query_len</tt>).
197
+ def query_len
198
+ iterations.first.query_len
199
+ end
200
+
201
+ private
202
+
203
+ # Splits headers into the first line, reference, query line and
204
+ # database line.
205
+ def format0_split_headers(data)
206
+ @f0header = data.shift
207
+ @f0references = []
208
+ while data[0] and /\ADatabase\:/ !~ data[0]
209
+ @f0references.push data.shift
210
+ end
211
+ @f0database = data.shift
212
+ # In special case, a void line is inserted after database name.
213
+ if /\A +[\d\,]+ +sequences\; +[\d\,]+ total +letters\s*\z/ =~ data[0] then
214
+ @f0database.concat "\n"
215
+ @f0database.concat data.shift
216
+ end
217
+ end
218
+
219
+ # Splits the search results.
220
+ def format0_split_search(data)
221
+ iterations = []
222
+ dummystr = 'Searching..................................................done'
223
+ if r = data[0] and /^Searching/ =~ r then
224
+ dummystr = data.shift
225
+ end
226
+ while r = data[0] and /^Query\=/ =~ r
227
+ iterations << Iteration.new(data, dummystr)
228
+ end
229
+ iterations
230
+ end
231
+
232
+ # Iteration class for RPS-Blast.
233
+ # Though RPS-Blast does not iterate like PSI-BLAST,
234
+ # it aims to store a result of single query sequence.
235
+ #
236
+ # Normally, the instance of the class is generated
237
+ # by Bio::Blast::RPSBlast::Report object.
238
+ #
239
+ class Iteration < Bio::Blast::Default::Report::Iteration
240
+ # Creates a new Iteration object.
241
+ # It is designed to be called only internally from
242
+ # the Bio::Blast::RPSBlast::Report class.
243
+ # Users shall not use the method directly.
244
+ def initialize(data, dummystr)
245
+ if /\AQuery\=/ =~ data[0] then
246
+ sc = StringScanner.new(data.shift)
247
+ sc.skip(/\s*/)
248
+ if sc.skip_until(/Query\= */) then
249
+ q = []
250
+ begin
251
+ q << sc.scan(/.*/)
252
+ sc.skip(/\s*^ ?/)
253
+ end until !sc.rest or r = sc.skip(/ *\( *([\,\d]+) *letters *\)\s*\z/)
254
+ @query_len = sc[1].delete(',').to_i if r
255
+ @query_def = q.join(' ')
256
+ end
257
+ end
258
+ data.unshift(dummystr)
259
+
260
+ super(data)
261
+ end
262
+
263
+ # definition of the query
264
+ attr_reader :query_def
265
+
266
+ # length of the query sequence
267
+ attr_reader :query_len
268
+
269
+ end #class Iteration
270
+
271
+ end #class Report
272
+
273
+ end #module RPSBlast
274
+
275
+ end #module Blast
276
+ end #module Bio
277
+
@@ -1,10 +1,10 @@
1
1
  #
2
2
  # = bio/appl/blast/wublast.rb - WU-BLAST default output parser
3
3
  #
4
- # Copyright:: Copyright (C) 2003 GOTO Naohisa <ng@bioruby.org>
4
+ # Copyright:: Copyright (C) 2003, 2008 Naohisa GOTO <ng@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: wublast.rb,v 1.12 2007/12/27 17:28:57 ngoto Exp $
7
+ # $Id:$
8
8
  #
9
9
  # == Description
10
10
  #
@@ -66,7 +66,94 @@ module Bio
66
66
  @notice
67
67
  end
68
68
 
69
+ # (WU-BLAST) Returns record number of the query.
70
+ # It may only be available for reports with multiple queries.
71
+ # Returns an Integer or nil.
72
+ def query_record_number
73
+ format0_parse_query
74
+ @query_record_number
75
+ end
76
+
77
+ # (WU-BLAST) Returns exit code for the execution.
78
+ # Returns an Integer or nil.
79
+ def exit_code
80
+ if defined? @exit_code then
81
+ @exit_code
82
+ else
83
+ nil
84
+ end
85
+ end
86
+
87
+ # (WU-BLAST) Returns the message bundled with the exit code output.
88
+ # The message will be shown when WU-BLAST ignores a fatal error
89
+ # due to the command line option "-nonnegok", "-novalidctxok",
90
+ # or "-shortqueryok".
91
+ #
92
+ # Returns a String or nil.
93
+ def exit_code_message
94
+ if defined? @exit_code_message then
95
+ @exit_code_message
96
+ else
97
+ nil
98
+ end
99
+ end
100
+
101
+ # (WU-BLAST) Returns "NOTE:" information.
102
+ # Returns nil or an array containing String.
103
+ def notes
104
+ if defined? @notes then
105
+ @notes
106
+ else
107
+ nil
108
+ end
109
+ end
110
+
111
+ # (WU-BLAST) Returns fatal error information.
112
+ # Returns nil or an array containing String.
113
+ def fatal_errors
114
+ if defined? @fatal_errors then
115
+ @fatal_errors
116
+ else
117
+ nil
118
+ end
119
+ end
120
+
121
+ # Returns the name (filename or title) of the database.
122
+ def db
123
+ unless defined?(@db)
124
+ if /Database *\: *(.*)/m =~ @f0database then
125
+ a = $1.split(/^/)
126
+ if a.size > 1 and /\ASearching\..+ done\s*\z/ =~ a[-1] then
127
+ a.pop
128
+ end
129
+ if a.size > 1 and /\A +[\d\,]+ +sequences\; +[\d\,]+ total +letters\.?\s*\z/ =~ a[-1] then
130
+ a.pop
131
+ end
132
+ @db = a.collect { |x| x.sub(/\s+\z/, '') }.join(' ')
133
+ end
134
+ end #unless
135
+ @db
136
+ end
137
+
69
138
  private
139
+ # Parses the query lines (begins with "Query = ").
140
+ def format0_parse_query
141
+ unless defined?(@query_def)
142
+ sc = StringScanner.new(@f0query)
143
+ sc.skip(/\s*/)
144
+ if sc.skip_until(/Query\= */) then
145
+ q = []
146
+ begin
147
+ q << sc.scan(/.*/)
148
+ sc.skip(/\s*^ ?/)
149
+ end until !sc.rest or r = sc.skip(/ *\( *([\,\d]+) *letters *(\; *record *([\,\d]+) *)?\)\s*\z/)
150
+ @query_len = sc[1].delete(',').to_i if r
151
+ @query_record_number = sc[3].delete(',').to_i if r and sc[2]
152
+ @query_def = q.join(' ')
153
+ end
154
+ end
155
+ end
156
+
70
157
  # Splits headers.
71
158
  def format0_split_headers(data)
72
159
  @f0header = data.shift
@@ -86,6 +173,19 @@ module Bio
86
173
  end
87
174
  end
88
175
  @f0query = data.shift
176
+ @f0warnings ||= []
177
+ while r = data.first
178
+ case r
179
+ when /^WARNING\: /
180
+ @f0warnings << data.shift
181
+ when /^NOTE\: /
182
+ @notes ||= []
183
+ @notes << data.shift
184
+ else
185
+ break #from the above "while"
186
+ end
187
+ end
188
+ return if r = data.first and /\A(Parameters\:|EXIT CODE *\d+)/ =~ r
89
189
  if r = data.first and !(/^Database\: / =~ r)
90
190
  @f0translate_info = data.shift
91
191
  end
@@ -94,23 +194,42 @@ module Bio
94
194
 
95
195
  # Splits search data.
96
196
  def format0_split_search(data)
197
+ @f0warnings ||= []
198
+ while r = data.first and r =~ /^WARNING\: /
199
+ @f0warnings << data.shift
200
+ end
97
201
  [ Iteration.new(data) ]
98
202
  end
99
203
 
100
204
  # Splits statistics parameters.
101
205
  def format0_split_stat_params(data)
102
- @f0warnings = []
103
- if r = data.first and r =~ /^WARNING\: / then
206
+ @f0warnings ||= []
207
+ while r = data.first and r =~ /^WARNING\: /
104
208
  @f0warnings << data.shift
105
209
  end
106
210
  @f0wu_params = []
107
211
  @f0wu_stats = []
108
- while r = data.shift and !(r =~ /^Statistics\:/)
109
- @f0wu_params << r
110
- end
111
- @f0wu_stats << r if r
112
- while r = data.shift
113
- @f0wu_stats << r
212
+ ary = @f0wu_params
213
+ while r = data.shift
214
+ case r
215
+ when /\AStatistics\:/
216
+ ary = @f0wu_stats
217
+ when /\AEXIT CODE *(\d+)\s*(.*)$/
218
+ @exit_code = $1.to_i
219
+ if $2 and !$2.empty? then
220
+ @exit_code_message = r.sub(/\AEXIT CODE *(\d+)\s*/, '')
221
+ end
222
+ r = nil
223
+ when /\AFATAL\: /
224
+ @fatal_errors ||= []
225
+ @fatal_errors.push r
226
+ r = nil
227
+ when /\AWARNING\: /
228
+ @f0warnings ||= []
229
+ @f0warnings << r
230
+ r = nil
231
+ end
232
+ ary << r if r
114
233
  end
115
234
  @f0dbstat = F0dbstat.new(@f0wu_stats)
116
235
  itr = @iterations[0]
@@ -205,8 +324,10 @@ module Bio
205
324
  @num = 1
206
325
  @f0message = []
207
326
  @f0warnings = []
208
- return unless r = data.shift
209
- @f0hitlist << r
327
+ return unless r = data.first
328
+ return if /\AParameters\:$/ =~ r
329
+ return if /\AEXIT CODE *\d+/ =~ r
330
+ @f0hitlist << data.shift
210
331
  return unless r = data.shift
211
332
  unless /\*{3} +NONE +\*{3}/ =~ r then
212
333
  @f0hitlist << r