bio 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -4,7 +4,7 @@
4
4
  # Copyright:: Copyright (C) 2002, 2003 Toshiaki Katayama <k@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: rexml.rb,v 1.12 2007/04/05 23:35:39 trevor Exp $
7
+ # $Id:$
8
8
  #
9
9
  # == Note
10
10
  #
@@ -37,9 +37,7 @@ module Bio
37
37
  case name
38
38
  when 'BlastOutput_param'
39
39
  e.elements["Parameters"].each_element_with_text do |p|
40
- k = p.name.sub(/Parameters_/, '')
41
- v = p.text =~ /\D/ ? p.text : p.text.to_i
42
- @parameters[k] = v
40
+ xml_set_parameter(p.name, p.text)
43
41
  end
44
42
  else
45
43
  hash[name] = text if text.strip.size > 0
@@ -72,8 +70,17 @@ module Bio
72
70
  v = s.text =~ /\D/ ? s.text.to_f : s.text.to_i
73
71
  iteration.statistics[k] = v
74
72
  end
73
+
74
+ # for new BLAST XML format
75
+ when 'Iteration_query-ID'
76
+ iteration.query_id = i.text
77
+ when 'Iteration_query-def'
78
+ iteration.query_def = i.text
79
+ when 'Iteration_query-len'
80
+ iteration.query_len = i.text.to_i
75
81
  end
76
- end
82
+ end #case i.name
83
+
77
84
  return iteration
78
85
  end
79
86
 
@@ -0,0 +1,277 @@
1
+ #
2
+ # = bio/appl/blast/rpsblast.rb - NCBI RPS Blast default output parser
3
+ #
4
+ # Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org>
5
+ # License:: The Ruby License
6
+ #
7
+ # $Id:$
8
+ #
9
+ # == Description
10
+ #
11
+ # NCBI RPS Blast (Reversed Position Specific Blast) default
12
+ # (-m 0 option) output parser class, Bio::Blast::RPSBlast::Report
13
+ # and related classes/modules.
14
+ #
15
+ # == References
16
+ #
17
+ # * Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,
18
+ # Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),
19
+ # "Gapped BLAST and PSI-BLAST: a new generation of protein database search
20
+ # programs", Nucleic Acids Res. 25:3389-3402.
21
+ # * ftp://ftp.ncbi.nih.gov/blast/documents/rpsblast.html
22
+ # * http://www.ncbi.nlm.nih.gov/Structure/cdd/cdd_help.shtml
23
+ #
24
+
25
+ require 'bio/appl/blast/format0'
26
+ require 'bio/io/flatfile'
27
+
28
+ module Bio
29
+ class Blast
30
+
31
+ # NCBI RPS Blast (Reversed Position Specific Blast) namespace.
32
+ # Currently, this module is existing only for separating namespace.
33
+ # To parse RPSBlast results, see Bio::Blast::RPSBlast::Report documents.
34
+ module RPSBlast
35
+
36
+ # Flatfile splitter for RPS-BLAST reports.
37
+ # It is internally used when reading RPS-BLAST report.
38
+ # Normally, users do not need to use it directly.
39
+ #
40
+ # Note for Windows: RPS-BLAST results generated in Microsoft Windows
41
+ # may not be parsed correctly due to the line feed code problem.
42
+ # For a workaroud, convert line feed codes from Windows(DOS) to UNIX.
43
+ #
44
+ class RPSBlastSplitter < Bio::FlatFile::Splitter::Template
45
+
46
+ # Separator used to distinguish start of each report
47
+ ReportHead = /\A\n*(RPS\-BLAST|Query\=)/
48
+
49
+ # Delimiter used for IO#gets
50
+ Delimiter = "\n\n"
51
+
52
+ # creates a new splitter object
53
+ def initialize(klass, bstream)
54
+ super(klass, bstream)
55
+ @entry_head = nil
56
+ end
57
+
58
+ # Skips leader of the entry.
59
+ # In this class, only skips space characters.
60
+ def skip_leader
61
+ stream.skip_spaces
62
+ return nil
63
+ end
64
+
65
+ # Rewinds the stream
66
+ def rewind
67
+ @entry_head = nil
68
+ super
69
+ end
70
+
71
+ # gets an entry
72
+ def get_entry
73
+ p0 = stream_pos()
74
+ pieces = []
75
+ flag_head = false # reached to start of header
76
+ flag_body = false # reached to start of body (Query=...)
77
+ while x = stream.gets(Delimiter)
78
+ if ReportHead =~ x then
79
+ case $1
80
+ when 'RPS-BLAST'
81
+ if pieces.empty? then
82
+ @entry_head = nil
83
+ flag_head = true
84
+ else
85
+ stream.ungets(x)
86
+ break
87
+ end
88
+ when 'Query='
89
+ if flag_body then
90
+ stream.ungets(x)
91
+ break
92
+ else
93
+ @entry_head = pieces.join('') if flag_head
94
+ flag_body = true
95
+ end
96
+ else
97
+ raise 'Bug: should not reach here'
98
+ end
99
+ end #if ReportHead...
100
+ pieces.push x
101
+ end #while
102
+ p1 = stream_pos()
103
+
104
+ self.entry_start_pos = p0
105
+ self.entry =
106
+ if pieces.empty? then
107
+ nil
108
+ elsif !flag_head and @entry_head then
109
+ @entry_head + pieces.join('')
110
+ else
111
+ pieces.join('')
112
+ end
113
+ self.entry_ended_pos = p1
114
+ return self.entry
115
+ end
116
+ end #class RPSBlastSplitter
117
+
118
+ # NCBI RPS Blast (Reversed Position Specific Blast)
119
+ # default output parser.
120
+ #
121
+ # It supports defalut (-m 0 option) output of the "rpsblast" command.
122
+ #
123
+ # Because this class inherits Bio::Blast::Default::Report,
124
+ # almost all methods are eqaul to Bio::Blast::Default::Report.
125
+ # Only DELIMITER (and RS) and few methods are different.
126
+ #
127
+ # By using Bio::FlatFile, (for example, Bio::FlatFile.open),
128
+ # rpsblast result generated from multiple query sequences is
129
+ # automatically splitted into multiple
130
+ # Bio::BLast::RPSBlast::Report objects corresponding to
131
+ # query sequences.
132
+ #
133
+ # Note for multi-fasta results WITH using Bio::FlatFile:
134
+ # Each splitted result is concatenated with header of the
135
+ # result which describes RPS-BLAST version and database
136
+ # information, if possible.
137
+ #
138
+ # Note for multi-fasta results WITHOUT using Bio::FlatFile:
139
+ # When parsing an output of rpsblast command running with
140
+ # multi-fasta sequences WITHOUT using Bio::FlatFile,
141
+ # each query's result is stored as an "iteration" of PSI-Blast.
142
+ # This behavior may be changed in the future.
143
+ #
144
+ # Note for nucleotide results: This class is not tested with
145
+ # nucleotide query and/or nucleotide databases.
146
+ #
147
+ class Report < Bio::Blast::Default::Report
148
+ # Delimter of each entry for RPS-BLAST.
149
+ DELIMITER = RS = "\nRPS-BLAST"
150
+
151
+ # (Integer) excess read size included in DELIMITER.
152
+ DELIMITER_OVERRUN = 9 # "RPS-BLAST"
153
+
154
+ # splitter for Bio::FlatFile support
155
+ FLATFILE_SPLITTER = RPSBlastSplitter
156
+
157
+ # Creates a new Report object from a string.
158
+ #
159
+ # Using Bio::FlatFile.open (or some other methods)
160
+ # is recommended instead of using this method directly.
161
+ # Refer Bio::Blast::RPSBlast::Report document for more information.
162
+ #
163
+ # Note for multi-fasta results WITHOUT using Bio::FlatFile:
164
+ # When parsing an output of rpsblast command running with
165
+ # multi-fasta sequences WITHOUT using Bio::FlatFile,
166
+ # each query's result is stored as an "iteration" of PSI-Blast.
167
+ # This behavior may be changed in the future.
168
+ #
169
+ # Note for nucleotide results: This class is not tested with
170
+ # nucleotide query and/or nucleotide databases.
171
+ #
172
+ def initialize(str)
173
+ str = str.sub(/\A\s+/, '')
174
+ # remove trailing entries for sure
175
+ str.sub!(/\n(RPS\-BLAST.*)/m, "\n")
176
+ @entry_overrun = $1
177
+ @entry = str
178
+ data = str.split(/(?:^[ \t]*\n)+/)
179
+
180
+ if data[0] and /\AQuery\=/ !~ data[0] then
181
+ format0_split_headers(data)
182
+ end
183
+ @iterations = format0_split_search(data)
184
+ format0_split_stat_params(data)
185
+ end
186
+
187
+ # Returns definition of the query.
188
+ # For a result of multi-fasta input, the first query's definition
189
+ # is returned (The same as <tt>iterations.first.query_def</tt>).
190
+ def query_def
191
+ iterations.first.query_def
192
+ end
193
+
194
+ # Returns length of the query.
195
+ # For a result of multi-fasta input, the first query's length
196
+ # is returned (The same as <tt>iterations.first.query_len</tt>).
197
+ def query_len
198
+ iterations.first.query_len
199
+ end
200
+
201
+ private
202
+
203
+ # Splits headers into the first line, reference, query line and
204
+ # database line.
205
+ def format0_split_headers(data)
206
+ @f0header = data.shift
207
+ @f0references = []
208
+ while data[0] and /\ADatabase\:/ !~ data[0]
209
+ @f0references.push data.shift
210
+ end
211
+ @f0database = data.shift
212
+ # In special case, a void line is inserted after database name.
213
+ if /\A +[\d\,]+ +sequences\; +[\d\,]+ total +letters\s*\z/ =~ data[0] then
214
+ @f0database.concat "\n"
215
+ @f0database.concat data.shift
216
+ end
217
+ end
218
+
219
+ # Splits the search results.
220
+ def format0_split_search(data)
221
+ iterations = []
222
+ dummystr = 'Searching..................................................done'
223
+ if r = data[0] and /^Searching/ =~ r then
224
+ dummystr = data.shift
225
+ end
226
+ while r = data[0] and /^Query\=/ =~ r
227
+ iterations << Iteration.new(data, dummystr)
228
+ end
229
+ iterations
230
+ end
231
+
232
+ # Iteration class for RPS-Blast.
233
+ # Though RPS-Blast does not iterate like PSI-BLAST,
234
+ # it aims to store a result of single query sequence.
235
+ #
236
+ # Normally, the instance of the class is generated
237
+ # by Bio::Blast::RPSBlast::Report object.
238
+ #
239
+ class Iteration < Bio::Blast::Default::Report::Iteration
240
+ # Creates a new Iteration object.
241
+ # It is designed to be called only internally from
242
+ # the Bio::Blast::RPSBlast::Report class.
243
+ # Users shall not use the method directly.
244
+ def initialize(data, dummystr)
245
+ if /\AQuery\=/ =~ data[0] then
246
+ sc = StringScanner.new(data.shift)
247
+ sc.skip(/\s*/)
248
+ if sc.skip_until(/Query\= */) then
249
+ q = []
250
+ begin
251
+ q << sc.scan(/.*/)
252
+ sc.skip(/\s*^ ?/)
253
+ end until !sc.rest or r = sc.skip(/ *\( *([\,\d]+) *letters *\)\s*\z/)
254
+ @query_len = sc[1].delete(',').to_i if r
255
+ @query_def = q.join(' ')
256
+ end
257
+ end
258
+ data.unshift(dummystr)
259
+
260
+ super(data)
261
+ end
262
+
263
+ # definition of the query
264
+ attr_reader :query_def
265
+
266
+ # length of the query sequence
267
+ attr_reader :query_len
268
+
269
+ end #class Iteration
270
+
271
+ end #class Report
272
+
273
+ end #module RPSBlast
274
+
275
+ end #module Blast
276
+ end #module Bio
277
+
@@ -1,10 +1,10 @@
1
1
  #
2
2
  # = bio/appl/blast/wublast.rb - WU-BLAST default output parser
3
3
  #
4
- # Copyright:: Copyright (C) 2003 GOTO Naohisa <ng@bioruby.org>
4
+ # Copyright:: Copyright (C) 2003, 2008 Naohisa GOTO <ng@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: wublast.rb,v 1.12 2007/12/27 17:28:57 ngoto Exp $
7
+ # $Id:$
8
8
  #
9
9
  # == Description
10
10
  #
@@ -66,7 +66,94 @@ module Bio
66
66
  @notice
67
67
  end
68
68
 
69
+ # (WU-BLAST) Returns record number of the query.
70
+ # It may only be available for reports with multiple queries.
71
+ # Returns an Integer or nil.
72
+ def query_record_number
73
+ format0_parse_query
74
+ @query_record_number
75
+ end
76
+
77
+ # (WU-BLAST) Returns exit code for the execution.
78
+ # Returns an Integer or nil.
79
+ def exit_code
80
+ if defined? @exit_code then
81
+ @exit_code
82
+ else
83
+ nil
84
+ end
85
+ end
86
+
87
+ # (WU-BLAST) Returns the message bundled with the exit code output.
88
+ # The message will be shown when WU-BLAST ignores a fatal error
89
+ # due to the command line option "-nonnegok", "-novalidctxok",
90
+ # or "-shortqueryok".
91
+ #
92
+ # Returns a String or nil.
93
+ def exit_code_message
94
+ if defined? @exit_code_message then
95
+ @exit_code_message
96
+ else
97
+ nil
98
+ end
99
+ end
100
+
101
+ # (WU-BLAST) Returns "NOTE:" information.
102
+ # Returns nil or an array containing String.
103
+ def notes
104
+ if defined? @notes then
105
+ @notes
106
+ else
107
+ nil
108
+ end
109
+ end
110
+
111
+ # (WU-BLAST) Returns fatal error information.
112
+ # Returns nil or an array containing String.
113
+ def fatal_errors
114
+ if defined? @fatal_errors then
115
+ @fatal_errors
116
+ else
117
+ nil
118
+ end
119
+ end
120
+
121
+ # Returns the name (filename or title) of the database.
122
+ def db
123
+ unless defined?(@db)
124
+ if /Database *\: *(.*)/m =~ @f0database then
125
+ a = $1.split(/^/)
126
+ if a.size > 1 and /\ASearching\..+ done\s*\z/ =~ a[-1] then
127
+ a.pop
128
+ end
129
+ if a.size > 1 and /\A +[\d\,]+ +sequences\; +[\d\,]+ total +letters\.?\s*\z/ =~ a[-1] then
130
+ a.pop
131
+ end
132
+ @db = a.collect { |x| x.sub(/\s+\z/, '') }.join(' ')
133
+ end
134
+ end #unless
135
+ @db
136
+ end
137
+
69
138
  private
139
+ # Parses the query lines (begins with "Query = ").
140
+ def format0_parse_query
141
+ unless defined?(@query_def)
142
+ sc = StringScanner.new(@f0query)
143
+ sc.skip(/\s*/)
144
+ if sc.skip_until(/Query\= */) then
145
+ q = []
146
+ begin
147
+ q << sc.scan(/.*/)
148
+ sc.skip(/\s*^ ?/)
149
+ end until !sc.rest or r = sc.skip(/ *\( *([\,\d]+) *letters *(\; *record *([\,\d]+) *)?\)\s*\z/)
150
+ @query_len = sc[1].delete(',').to_i if r
151
+ @query_record_number = sc[3].delete(',').to_i if r and sc[2]
152
+ @query_def = q.join(' ')
153
+ end
154
+ end
155
+ end
156
+
70
157
  # Splits headers.
71
158
  def format0_split_headers(data)
72
159
  @f0header = data.shift
@@ -86,6 +173,19 @@ module Bio
86
173
  end
87
174
  end
88
175
  @f0query = data.shift
176
+ @f0warnings ||= []
177
+ while r = data.first
178
+ case r
179
+ when /^WARNING\: /
180
+ @f0warnings << data.shift
181
+ when /^NOTE\: /
182
+ @notes ||= []
183
+ @notes << data.shift
184
+ else
185
+ break #from the above "while"
186
+ end
187
+ end
188
+ return if r = data.first and /\A(Parameters\:|EXIT CODE *\d+)/ =~ r
89
189
  if r = data.first and !(/^Database\: / =~ r)
90
190
  @f0translate_info = data.shift
91
191
  end
@@ -94,23 +194,42 @@ module Bio
94
194
 
95
195
  # Splits search data.
96
196
  def format0_split_search(data)
197
+ @f0warnings ||= []
198
+ while r = data.first and r =~ /^WARNING\: /
199
+ @f0warnings << data.shift
200
+ end
97
201
  [ Iteration.new(data) ]
98
202
  end
99
203
 
100
204
  # Splits statistics parameters.
101
205
  def format0_split_stat_params(data)
102
- @f0warnings = []
103
- if r = data.first and r =~ /^WARNING\: / then
206
+ @f0warnings ||= []
207
+ while r = data.first and r =~ /^WARNING\: /
104
208
  @f0warnings << data.shift
105
209
  end
106
210
  @f0wu_params = []
107
211
  @f0wu_stats = []
108
- while r = data.shift and !(r =~ /^Statistics\:/)
109
- @f0wu_params << r
110
- end
111
- @f0wu_stats << r if r
112
- while r = data.shift
113
- @f0wu_stats << r
212
+ ary = @f0wu_params
213
+ while r = data.shift
214
+ case r
215
+ when /\AStatistics\:/
216
+ ary = @f0wu_stats
217
+ when /\AEXIT CODE *(\d+)\s*(.*)$/
218
+ @exit_code = $1.to_i
219
+ if $2 and !$2.empty? then
220
+ @exit_code_message = r.sub(/\AEXIT CODE *(\d+)\s*/, '')
221
+ end
222
+ r = nil
223
+ when /\AFATAL\: /
224
+ @fatal_errors ||= []
225
+ @fatal_errors.push r
226
+ r = nil
227
+ when /\AWARNING\: /
228
+ @f0warnings ||= []
229
+ @f0warnings << r
230
+ r = nil
231
+ end
232
+ ary << r if r
114
233
  end
115
234
  @f0dbstat = F0dbstat.new(@f0wu_stats)
116
235
  itr = @iterations[0]
@@ -205,8 +324,10 @@ module Bio
205
324
  @num = 1
206
325
  @f0message = []
207
326
  @f0warnings = []
208
- return unless r = data.shift
209
- @f0hitlist << r
327
+ return unless r = data.first
328
+ return if /\AParameters\:$/ =~ r
329
+ return if /\AEXIT CODE *\d+/ =~ r
330
+ @f0hitlist << data.shift
210
331
  return unless r = data.shift
211
332
  unless /\*{3} +NONE +\*{3}/ =~ r then
212
333
  @f0hitlist << r