bio 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -0,0 +1,220 @@
1
+ #
2
+ # = bio/appl/blast/ncbioptions.rb - NCBI Tools-style options parser
3
+ #
4
+ # Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org>
5
+ # License:: The Ruby License
6
+ #
7
+ # $Id:$
8
+ #
9
+ # == Description
10
+ #
11
+ # Bio::Blast::NCBIOptions is a class to parse and store NCBI Tools-style
12
+ # command-line options.
13
+ # It is internally used in Bio::Blast and some other classes.
14
+ #
15
+
16
+ require 'bio/appl/blast'
17
+ require 'shellwords'
18
+
19
+ class Bio::Blast
20
+
21
+ # A class to parse and store NCBI-tools style command-line options.
22
+ # It is internally used in Bio::Blast and some other classes.
23
+ #
24
+ class NCBIOptions
25
+
26
+ # creates a new object from an array
27
+ def initialize(options = [])
28
+ #@option_pairs = []
29
+ @option_pairs = _parse_options(options)
30
+ end
31
+
32
+ # (protected) option pairs. internal use only.
33
+ attr_reader :option_pairs
34
+ protected :option_pairs
35
+
36
+ # (private) parse options from given array
37
+ def _parse_options(options)
38
+ i = 0
39
+ pairs = []
40
+ while i < options.size
41
+ opt = options[i].to_s
42
+ if m = /\A(\-.)/.match(opt) then
43
+ key = m[1]
44
+ if m.post_match.empty? then
45
+ i += 1
46
+ val = options.fetch(i) rescue ''
47
+ else
48
+ val = m.post_match
49
+ end
50
+ pairs.push([ key, val ])
51
+ elsif '-' == opt then
52
+ pairs.push [ opt ]
53
+ else
54
+ #warn "Arguments must start with \'-\'" if $VERBOSE
55
+ pairs.push [ opt ]
56
+ end
57
+ i += 1
58
+ end
59
+ pairs
60
+ end
61
+ private :_parse_options
62
+
63
+ # Normalize options.
64
+ # For two or more same options (e.g. '-p blastn -p blastp'),
65
+ # only the last option is used. (e.g. '-p blastp' for above example).
66
+ #
67
+ # Note that completely illegal options are left untouched.
68
+ #
69
+ # ---
70
+ # *Returns*:: self
71
+ def normalize!
72
+ hash = {}
73
+ newpairs = []
74
+ @option_pairs.reverse_each do |pair|
75
+ if pair.size == 2 then
76
+ key = pair[0]
77
+ unless hash[key] then
78
+ newpairs.push pair
79
+ hash[key] = pair
80
+ end
81
+ else
82
+ newpairs.push pair
83
+ end
84
+ end
85
+ newpairs.reverse!
86
+ @option_pairs = newpairs
87
+ self
88
+ end
89
+
90
+ # current options as an array of strings
91
+ def options
92
+ @option_pairs.flatten
93
+ end
94
+
95
+ # parses a string and returns a new object
96
+ def self.parse(str)
97
+ options = Shellwords.shellwords(str)
98
+ self.new(options)
99
+ end
100
+
101
+ # (private) key string to regexp
102
+ def _key_to_regexp(key)
103
+ key = key.sub(/\A\-/, '')
104
+ Regexp.new('\A\-' + Regexp.escape(key) + '\z')
105
+ end
106
+ private :_key_to_regexp
107
+
108
+ # Return the option.
109
+ # ---
110
+ # *Arguments*:
111
+ # * _key_: option name as a string, e.g. 'm', 'p', or '-m', '-p'.
112
+ # *Returns*:: String or nil
113
+ def get(key)
114
+ re = _key_to_regexp(key)
115
+
116
+ # Note: the last option is used when two or more same option exist.
117
+ value = nil
118
+ @option_pairs.reverse_each do |pair|
119
+ if re =~ pair[0] then
120
+ value = pair[1]
121
+ break
122
+ end
123
+ end
124
+ return value
125
+ end
126
+
127
+ # Delete the given option.
128
+ # ---
129
+ # *Arguments*:
130
+ # * _key_: option name as a string, e.g. 'm', 'p', or '-m', '-p'.
131
+ # *Returns*:: String or nil
132
+ def delete(key)
133
+ re = _key_to_regexp(key)
134
+
135
+ # Note: the last option is used for return value
136
+ # when two or more same option exist.
137
+ oldvalue = nil
138
+ @option_pairs = @option_pairs.delete_if do |pair|
139
+ if re =~ pair[0] then
140
+ oldvalue = pair[1]
141
+ true
142
+ else
143
+ false
144
+ end
145
+ end
146
+ return oldvalue
147
+ end
148
+
149
+ # Sets the option to given value.
150
+ #
151
+ # For example, if you want to set '-p blastall' option,
152
+ # obj.set('p', 'blastall')
153
+ # or
154
+ # obj.set('-p', 'blastall')
155
+ # (above two are equivalent).
156
+ #
157
+ # ---
158
+ # *Arguments*:
159
+ # * _key_: option name as a string, e.g. 'm', 'p'.
160
+ # * _value_: value as a string, e.g. '7', 'blastp'.
161
+ # *Returns*:: previous value; String or nil
162
+ def set(key, value)
163
+ re = _key_to_regexp(key)
164
+ oldvalue = nil
165
+ flag = false
166
+ # Note: only the last options is modified for multiple same options.
167
+ @option_pairs.reverse_each do |pair|
168
+ if re =~ pair[0] then
169
+ oldvalue = pair[1]
170
+ pair[1] = value
171
+ flag = true
172
+ break
173
+ end
174
+ end
175
+ unless flag then
176
+ key = "-#{key}" unless key[0, 1] == '-'
177
+ @option_pairs.push([ key, value ])
178
+ end
179
+ oldvalue
180
+ end
181
+
182
+ # Adds options from given array.
183
+ # Note that existing options will also be normalized.
184
+ # ---
185
+ # *Arguments*:
186
+ # * _options_: options as an Array of String objects.
187
+ # *Returns*:: self
188
+ def add_options(options)
189
+ @option_pairs.concat _parse_options(options)
190
+ self.normalize!
191
+ self
192
+ end
193
+
194
+ # If self == other, returns true. Otherwise, returns false.
195
+ def ==(other)
196
+ return true if super(other)
197
+ begin
198
+ oopts = other.options
199
+ rescue
200
+ return false
201
+ end
202
+ return self.options == oopts
203
+ end
204
+
205
+ # Returns an array for command-line options.
206
+ # prior_options are preferred to be used.
207
+ def make_command_line_options(prior_options = [])
208
+ newopts = self.class.new(self.options)
209
+ #newopts.normalize!
210
+ prior_pairs = _parse_options(prior_options)
211
+ prior_pairs.each do |pair|
212
+ newopts.delete(pair[0])
213
+ end
214
+ newopts.option_pairs[0, 0] = prior_pairs
215
+ newopts.options
216
+ end
217
+
218
+ end #class NCBIOptions
219
+
220
+ end #class Bio::Blast
@@ -0,0 +1,106 @@
1
+ #
2
+ # = bio/appl/blast/remote.rb - remote BLAST wrapper basic module
3
+ #
4
+ # Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org>
5
+ # License:: The Ruby License
6
+ #
7
+ # $Id:$
8
+ #
9
+
10
+ require 'bio/appl/blast'
11
+
12
+ class Bio::Blast
13
+
14
+ # Bio::Blast::Remote is a namespace for Remote Blast factory.
15
+ module Remote
16
+
17
+ autoload :GenomeNet, 'bio/appl/blast/genomenet'
18
+ autoload :Genomenet, 'bio/appl/blast/genomenet'
19
+
20
+ autoload :DDBJ, 'bio/appl/blast/ddbj'
21
+ autoload :Ddbj, 'bio/appl/blast/ddbj'
22
+
23
+ # creates a remote BLAST factory using GenomeNet
24
+ def self.genomenet(program, db, options = [])
25
+ GenomeNet.new(program, db, options)
26
+ #Bio::Blast.new(program, db, options, 'genomenet')
27
+ end
28
+
29
+ # creates a remote BLAST factory using DDBJ Web service
30
+ def self.ddbj(program, db, options = [])
31
+ DDBJ.new(program, db, options)
32
+ #Bio::Blast.new(program, db, options, 'ddbj')
33
+ end
34
+
35
+ # Common methods for meta-information processing
36
+ # (e.g. list of databases).
37
+ module Information
38
+
39
+ # (private) parses database information and stores data
40
+ def _parse_databases
41
+ raise NotImplementedError
42
+ end
43
+ private :_parse_databases
44
+
45
+ # Returns a list of available nucleotide databases.
46
+ #
47
+ # Note: see the note of databases method.
48
+ #
49
+ # ---
50
+ # *Returns*:: Array containing String objects
51
+ def nucleotide_databases
52
+ _parse_databases
53
+ @databases['blastn']
54
+ end
55
+
56
+ # Returns a list of available protein databases.
57
+ #
58
+ # Note: see the note of databases method.
59
+ # ---
60
+ # *Returns*:: Array containing String objects
61
+ def protein_databases
62
+ _parse_databases
63
+ @databases['blastp']
64
+ end
65
+
66
+ # Returns a list of available databases for given program.
67
+ #
68
+ # Note: It parses remote sites to obtain database information
69
+ # (e.g. http://blast.genome.jp/ for Bio::Blast::Remote::GenomeNet).
70
+ # Thus, if the site is changed, this method can not return correct data.
71
+ # Please tell BioRuby developers when the site is changed.
72
+ #
73
+ # ---
74
+ # *Arguments*:
75
+ # * _program_ (required): blast program('blastn', 'blastp', 'blastx', 'tblastn' or 'tblastx')
76
+ # *Returns*:: Array containing String objects
77
+ def databases(program)
78
+ _parse_databases
79
+ @databases[program] || []
80
+ end
81
+
82
+ # Returns a short description of given database.
83
+ #
84
+ # Note: see the note of databases method.
85
+ # ---
86
+ # *Arguments*:
87
+ # * _program_ (required): 'blastn', 'blastp', 'blastx', 'tblastn' or 'tblastx'
88
+ # * _db_ (required): database name
89
+ # *Returns*:: String
90
+ def database_description(program, db)
91
+ _parse_databases
92
+ h = @database_descriptions[program]
93
+ h ? (h[db] || '') : ''
94
+ end
95
+
96
+ # Resets data and clears cached data in this module.
97
+ def reset
98
+ @parse_databases = false
99
+ true
100
+ end
101
+ end #module Information
102
+
103
+ end #module Remote
104
+
105
+ end #class Bio::Blast
106
+
@@ -4,13 +4,14 @@
4
4
  # Copyright:: Copyright (C) 2003 Toshiaki Katayama <k@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: report.rb,v 1.11 2007/04/05 23:35:39 trevor Exp $
7
+ # $Id:$
8
8
  #
9
9
 
10
10
  require 'bio/appl/blast'
11
11
  require 'bio/appl/blast/xmlparser'
12
12
  require 'bio/appl/blast/rexml'
13
13
  require 'bio/appl/blast/format8'
14
+ require 'bio/io/flatfile'
14
15
 
15
16
  module Bio
16
17
  class Blast
@@ -64,8 +65,10 @@ class Report
64
65
  if /<?xml/.match(data[/.*/])
65
66
  if defined?(XMLParser)
66
67
  xmlparser_parse(data)
68
+ @reports = blastxml_split_reports
67
69
  else
68
70
  rexml_parse(data)
71
+ @reports = blastxml_split_reports
69
72
  end
70
73
  else
71
74
  tab_parse(data)
@@ -81,10 +84,14 @@ class Report
81
84
  case parser
82
85
  when :xmlparser # format 7
83
86
  xmlparser_parse(data)
87
+ @reports = blastxml_split_reports
84
88
  when :rexml # format 7
85
89
  rexml_parse(data)
90
+ @reports = blastxml_split_reports
86
91
  when :tab # format 8
87
92
  tab_parse(data)
93
+ when false
94
+ # do not parse, creates an empty object
88
95
  else
89
96
  auto_parse(data)
90
97
  end
@@ -98,23 +105,45 @@ class Report
98
105
  # 'gap-open', 'gap-extend', 'filter'
99
106
  attr_reader :parameters
100
107
 
108
+ #--
101
109
  # Shortcut for BlastOutput values.
102
- attr_reader :program, :version, :reference, :db, :query_id, :query_def, :query_len
110
+ #++
111
+
112
+ # program name (e.g. "blastp") (String)
113
+ attr_reader :program
114
+
115
+ # BLAST version (e.g. "blastp 2.2.18 [Mar-02-2008]") (String)
116
+ attr_reader :version
117
+
118
+ # reference (String)
119
+ attr_reader :reference
120
+
121
+ # database name or title (String)
122
+ attr_reader :db
123
+
124
+ # query ID (String)
125
+ attr_reader :query_id
126
+
127
+ # query definition line (String)
128
+ attr_reader :query_def
129
+
130
+ # query length (Integer)
131
+ attr_reader :query_len
103
132
 
104
133
  # Matrix used (-M) : shortcuts for @parameters
105
134
  def matrix; @parameters['matrix']; end
106
135
  # Expectation threshold (-e) : shortcuts for @parameters
107
- def expect; @parameters['expect'].to_i; end
136
+ def expect; @parameters['expect']; end
108
137
  # Inclusion threshold (-h) : shortcuts for @parameters
109
- def inclusion; @parameters['include'].to_i; end
138
+ def inclusion; @parameters['include']; end
110
139
  # Match score for NT (-r) : shortcuts for @parameters
111
- def sc_match; @parameters['sc-match'].to_i; end
140
+ def sc_match; @parameters['sc-match']; end
112
141
  # Mismatch score for NT (-q) : shortcuts for @parameters
113
- def sc_mismatch; @parameters['sc-mismatch'].to_i; end
142
+ def sc_mismatch; @parameters['sc-mismatch']; end
114
143
  # Gap opening cost (-G) : shortcuts for @parameters
115
- def gap_open; @parameters['gap-open'].to_i; end
144
+ def gap_open; @parameters['gap-open']; end
116
145
  # Gap extension cost (-E) : shortcuts for @parameters
117
- def gap_extend; @parameters['gap-extend'].to_i; end
146
+ def gap_extend; @parameters['gap-extend']; end
118
147
  # Filtering options (-F) : shortcuts for @parameters
119
148
  def filter; @parameters['filter']; end
120
149
  # PHI-BLAST pattern : shortcuts for @parameters
@@ -204,7 +233,17 @@ class Report
204
233
  yield x
205
234
  end
206
235
  end
207
- end
236
+
237
+ # query ID, only available for new BLAST XML format
238
+ attr_accessor :query_id
239
+
240
+ # query definition, only available for new BLAST XML format
241
+ attr_accessor :query_def
242
+
243
+ # query length, only available for new BLAST XML format
244
+ attr_accessor :query_len
245
+
246
+ end #class Iteration
208
247
 
209
248
 
210
249
  # Bio::Blast::Report::Hit
@@ -323,8 +362,220 @@ class Report
323
362
  attr_accessor :mismatch_count
324
363
  end
325
364
 
365
+
366
+ # When the report contains results for multiple query sequences,
367
+ # returns an array of Bio::Blast::Report objects corresponding to
368
+ # the multiple queries.
369
+ # Otherwise, returns nil.
370
+ #
371
+ # Note for "No hits found":
372
+ # When no hits found for a query sequence, the result for the query
373
+ # is completely void and no information available in the result XML,
374
+ # including query ID and query definition.
375
+ # The only trace is that iteration number is skipped.
376
+ # This means that if the no-hit query is the last query,
377
+ # the query can not be detected, because the result XML is
378
+ # completely the same as the result XML without the query.
379
+ attr_reader :reports
380
+
381
+ private
382
+ # (private method)
383
+ # In new BLAST XML (blastall >= 2.2.14), results of multiple queries
384
+ # are stored in <Iteration>. This method splits iterations into
385
+ # multiple Bio::Blast objects and returns them as an array.
386
+ def blastxml_split_reports
387
+ unless self.iterations.find { |iter|
388
+ iter.query_id || iter.query_def || iter.query_len
389
+ } then
390
+ # traditional BLAST XML format, or blastpgp result.
391
+ return nil
392
+ end
393
+
394
+ # new BLAST XML format (blastall 2.2.14 or later)
395
+ origin = self
396
+ reports = []
397
+ prev_iternum = 0
398
+ firsttime = true
399
+
400
+ orig_iters = self.iterations
401
+ orig_iters.each do |iter|
402
+ blast = self.class.new(nil, false)
403
+ # When no hits found, the iteration is skipped in NCBI BLAST XML.
404
+ # So, filled with empty report object.
405
+ if prev_iternum + 1 < iter.num then
406
+ ((prev_iternum + 1)...(iter.num)).each do |num|
407
+ empty_i = Iteration.new
408
+ empty_i.num = num
409
+ empty_i.instance_eval {
410
+ if firsttime then
411
+ @query_id = origin.query_id
412
+ @query_def = origin.query_def
413
+ @query_len = origin.query_len
414
+ firsttime = false
415
+ end
416
+ }
417
+ empty = self.class.new(nil, false)
418
+ empty.instance_eval {
419
+ # queriy_* are copied from the empty_i
420
+ @query_id = empty_i.query_id
421
+ @query_def = empty_i.query_def
422
+ @query_len = empty_i.query_len
423
+ # others are copied from the origin
424
+ @program = origin.program
425
+ @version = origin.version
426
+ @reference = origin.reference
427
+ @db = origin.db
428
+ @parameters.update(origin.parameters)
429
+ # the empty_i is added to the iterations
430
+ @iterations.push empty_i
431
+ }
432
+ reports.push empty
433
+ end
434
+ end
435
+
436
+ blast.instance_eval {
437
+ if firsttime then
438
+ @query_id = origin.query_id
439
+ @query_def = origin.query_def
440
+ @query_len = origin.query_len
441
+ firsttime = false
442
+ end
443
+ # queriy_* are copied from the iter
444
+ @query_id = iter.query_id if iter.query_id
445
+ @query_def = iter.query_def if iter.query_def
446
+ @query_len = iter.query_len if iter.query_len
447
+ # others are copied from the origin
448
+ @program = origin.program
449
+ @version = origin.version
450
+ @reference = origin.reference
451
+ @db = origin.db
452
+ @parameters.update(origin.parameters)
453
+ # rewrites hit's query_id, query_def, query_len
454
+ iter.hits.each do |h|
455
+ h.query_id = @query_id
456
+ h.query_def = @query_def
457
+ h.query_len = @query_len
458
+ end
459
+ # the iter is added to the iterations
460
+ @iterations.push iter
461
+ }
462
+
463
+ prev_iternum = iter.num
464
+ reports.push blast
465
+ end #orig_iters.each
466
+
467
+ # This object's iterations is set as first report's iterations
468
+ @iterations.clear
469
+ if rep = reports.first then
470
+ @iterations = rep.iterations
471
+ end
472
+
473
+ return reports
474
+ end
475
+
476
+ # Flatfile splitter for NCBI BLAST XML format.
477
+ # It is internally used when reading BLAST XML.
478
+ # Normally, users do not need to use it directly.
479
+ class BlastXmlSplitter < Bio::FlatFile::Splitter::Default
480
+
481
+ # creates a new splitter object
482
+ def initialize(klass, bstream)
483
+ super(klass, bstream)
484
+ @parsed_entries = []
485
+ @raw_unsupported = false
486
+ end
487
+
488
+ # rewinds
489
+ def rewind
490
+ ret = super
491
+ @parsed_entries.clear
492
+ @raw_unsupported = false
493
+ ret
494
+ end
495
+
496
+ # do nothing
497
+ def skip_leader
498
+ nil
499
+ end
500
+
501
+ # get an entry and return the entry as a string
502
+ def get_entry
503
+ if @parsed_entries.empty? then
504
+ @raw_unsupported = false
505
+ ent = super
506
+ prepare_parsed_entries(ent)
507
+ self.parsed_entry = @parsed_entries.shift
508
+ else
509
+ raise 'not supported for new BLAST XML format'
510
+ end
511
+ ent
512
+ end
513
+
514
+ # get an entry as a Bio::Blast::Report object
515
+ def get_parsed_entry
516
+ if @parsed_entries.empty? then
517
+ ent = get_entry
518
+ else
519
+ self.parsed_entry = @parsed_entries.shift
520
+ self.entry = nil
521
+ @raw_unsupported = true
522
+ end
523
+ self.parsed_entry
524
+ end
525
+
526
+ # current raw entry as a String
527
+ def entry
528
+ raise 'not supported for new BLAST XML format' if @raw_unsupported
529
+ super
530
+ end
531
+
532
+ # start position of the entry
533
+ def entry_start_pos
534
+ if entry_pos_flag then
535
+ raise 'not supported for new BLAST XML format' if @raw_unsupported
536
+ end
537
+ super
538
+ end
539
+
540
+ # (end position of the entry) + 1
541
+ def entry_ended_pos
542
+ if entry_pos_flag then
543
+ raise 'not supported for new BLAST XML format' if @raw_unsupported
544
+ end
545
+ super
546
+ end
547
+
548
+ private
549
+ # (private method) to prepare parsed entry
550
+ def prepare_parsed_entries(ent)
551
+ if ent then
552
+ blast = dbclass.new(ent)
553
+ if blast.reports and blast.reports.size >= 1 then
554
+ # new blast xml using <Iteration> for multiple queries
555
+ @parsed_entries.concat blast.reports
556
+ else
557
+ # traditional blast xml
558
+ @parsed_entries.push blast
559
+ end
560
+ end
561
+ end
562
+
563
+ end #class BlastXmlSplitter
564
+
565
+ # splitter for Bio::FlatFile support
566
+ FLATFILE_SPLITTER = BlastXmlSplitter
567
+
326
568
  end # Report
327
569
 
570
+ # NCBI BLAST tabular (-m 8) output parser.
571
+ # All methods are equal to Bio::Blast::Report.
572
+ # Only DELIMITER (and RS) is different.
573
+ #
574
+ class Report_tab < Report
575
+ # Delimter of each entry. Bio::FlatFile uses it.
576
+ DELIMITER = RS = nil
577
+ end #class Report_tabular
578
+
328
579
  end # Blast
329
580
  end # Bio
330
581