bio 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,350 @@
1
+ #
2
+ # = bio/appl/bl2seq/report.rb - bl2seq (BLAST 2 sequences) parser
3
+ #
4
+ # Copyright:: Copyright (C) 2005 GOTO Naohisa <ng@bioruby.org>
5
+ # License:: LGPL
6
+ #
7
+ #--
8
+ # This library is free software; you can redistribute it and/or
9
+ # modify it under the terms of the GNU Lesser General Public
10
+ # License as published by the Free Software Foundation; either
11
+ # version 2 of the License, or (at your option) any later version.
12
+ #
13
+ # This library is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ # Lesser General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU Lesser General Public
19
+ # License along with this library; if not, write to the Free Software
20
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
+ #++
22
+ #
23
+ # $Id: report.rb,v 1.6 2005/12/18 15:58:39 k Exp $
24
+ #
25
+ # Bio::Bl2seq::Report is a NCBI bl2seq (BLAST 2 sequences) output parser.
26
+ #
27
+ # = Acknowledgements
28
+ #
29
+ # Thanks to Tomoaki NISHIYAMA <tomoakin __at__ kenroku.kanazawa-u.ac.jp>
30
+ # for providing bl2seq parser patches based on
31
+ # lib/bio/appl/blast/format0.rb.
32
+ #
33
+
34
+ require 'bio/appl/blast/format0'
35
+
36
+ module Bio
37
+ class Blast
38
+
39
+ class Bl2seq
40
+
41
+ # Bio::Bl2seq::Report is a NCBI bl2seq (BLAST 2 sequences) output parser.
42
+ # It inherits Bio::Blast::Default::Report.
43
+ # Most of its methods are the same as Bio::Blast::Default::Report,
44
+ # but it lacks many methods.
45
+ class Report < Bio::Blast::Default::Report
46
+
47
+ # Delimiter of each entry. Bio::FlatFile uses it.
48
+ # In Bio::Bl2seq::Report, it it nil (1 entry 1 file).
49
+ DELIMITER = RS = nil
50
+
51
+ undef format0_parse_header
52
+ undef program, version, version_number, version_date,
53
+ message, converged?, reference, db
54
+
55
+ # Splits headers.
56
+ def format0_split_headers(data)
57
+ @f0query = data.shift
58
+ end
59
+ private :format0_split_headers
60
+
61
+ # Splits the search results.
62
+ def format0_split_search(data)
63
+ iterations = []
64
+ while r = data[0] and /^\>/ =~ r
65
+ iterations << Iteration.new(data)
66
+ end
67
+ if iterations.size <= 0 then
68
+ iterations << Iteration.new(data)
69
+ end
70
+ iterations
71
+ end
72
+ private :format0_split_search
73
+
74
+ # Stores format0 database statistics.
75
+ # Internal use only. Users must not use the class.
76
+ class F0dbstat < Bio::Blast::Default::Report::F0dbstat #:nodoc:
77
+ # Returns number of sequences in database.
78
+ def db_num
79
+ unless defined?(@db_num)
80
+ parse_params
81
+ @db_num = @hash['Number of Sequences'].to_i
82
+ end
83
+ @db_num
84
+ end
85
+
86
+ # Returns number of letters in database.
87
+ def db_len
88
+ unless defined?(@db_len)
89
+ parse_params
90
+ @db_len = @hash['length of database'].to_i
91
+ end
92
+ @db_len
93
+ end
94
+ end #class F0dbstat
95
+
96
+ # Bio::Bl2seq::Report::Iteration stores information about
97
+ # a iteration.
98
+ # Normally, it may contain some Bio::Bl2seq::Report::Hit objects.
99
+ #
100
+ # Note that its main existance reason is to keep complatibility
101
+ # between Bio::Blast::Default::Report::* classes.
102
+ class Iteration < Bio::Blast::Default::Report::Iteration
103
+ # Creates a new Iteration object.
104
+ # It is designed to be called only internally from
105
+ # the Bio::Blast::Default::Report class.
106
+ # Users shall not use the method directly.
107
+ def initialize(data)
108
+ @f0stat = []
109
+ @f0dbstat = Bio::Blast::Default::Report::AlwaysNil.instance
110
+ @hits = []
111
+ @num = 1
112
+ while r = data[0] and /^\>/ =~ r
113
+ @hits << Hit.new(data)
114
+ end
115
+ end
116
+
117
+ # Returns the hits of the iteration.
118
+ # It returns an array of Bio::Bl2seq::Report::Hit objects.
119
+ def hits; @hits; end
120
+
121
+ undef message, pattern_in_database,
122
+ pattern, pattern_positions, hits_found_again,
123
+ hits_newly_found, hits_for_pattern, parse_hitlist,
124
+ converged?
125
+ end #class Iteration
126
+
127
+ # Bio::Bl2seq::Report::Hit contains information about a hit.
128
+ # It may contain some Bio::Blast::Default::Report::HSP objects.
129
+ # All methods are the same as Bio::Blast::Default::Report::Hit class.
130
+ # Please refer to Bio::Blast::Default::Report::Hit.
131
+ class Hit < Bio::Blast::Default::Report::Hit
132
+ end #class Hit
133
+
134
+ # Bio::Bl2seq::Report::HSP holds information about the hsp
135
+ # (high-scoring segment pair).
136
+ # NOTE that the HSP class below is NOT used because
137
+ # Ruby's constants namespace are normally statically determined
138
+ # and HSP object is created in Bio::Blast::Default::Report::Hit class.
139
+ # Please refer to Bio::Blast::Default::Report::HSP.
140
+ class HSP < Bio::Blast::Default::Report::HSP
141
+ end #class HSP
142
+
143
+ end #class Report
144
+ end #class Bl2seq
145
+
146
+ end #class Blast
147
+ end #module Bio
148
+
149
+ ######################################################################
150
+
151
+ if __FILE__ == $0
152
+
153
+ Bio::FlatFile.open(Bio::Blast::Bl2seq::Report, ARGF) do |ff|
154
+ ff.each do |rep|
155
+
156
+ print "# === Bio::Blast::Bl2seq::Report\n"
157
+ puts
158
+ #@#print " rep.program #=> "; p rep.program
159
+ #@#print " rep.version #=> "; p rep.version
160
+ #@#print " rep.reference #=> "; p rep.reference
161
+ #@#print " rep.db #=> "; p rep.db
162
+ #print " rep.query_id #=> "; p rep.query_id
163
+ print " rep.query_def #=> "; p rep.query_def
164
+ print " rep.query_len #=> "; p rep.query_len
165
+ #puts
166
+ #@#print " rep.version_number #=> "; p rep.version_number
167
+ #@#print " rep.version_date #=> "; p rep.version_date
168
+ puts
169
+
170
+ print "# === Parameters\n"
171
+ #puts
172
+ #print " rep.parameters #=> "; p rep.parameters
173
+ puts
174
+ print " rep.matrix #=> "; p rep.matrix
175
+ print " rep.expect #=> "; p rep.expect
176
+ #print " rep.inclusion #=> "; p rep.inclusion
177
+ print " rep.sc_match #=> "; p rep.sc_match
178
+ print " rep.sc_mismatch #=> "; p rep.sc_mismatch
179
+ print " rep.gap_open #=> "; p rep.gap_open
180
+ print " rep.gap_extend #=> "; p rep.gap_extend
181
+ #print " rep.filter #=> "; p rep.filter
182
+ #@#print " rep.pattern #=> "; p rep.pattern
183
+ #print " rep.entrez_query #=> "; p rep.entrez_query
184
+ #puts
185
+ #@#print " rep.pattern_positions #=> "; p rep.pattern_positions
186
+ puts
187
+
188
+ print "# === Statistics (last iteration's)\n"
189
+ #puts
190
+ #print " rep.statistics #=> "; p rep.statistics
191
+ puts
192
+ print " rep.db_num #=> "; p rep.db_num
193
+ print " rep.db_len #=> "; p rep.db_len
194
+ #print " rep.hsp_len #=> "; p rep.hsp_len
195
+ print " rep.eff_space #=> "; p rep.eff_space
196
+ print " rep.kappa #=> "; p rep.kappa
197
+ print " rep.lambda #=> "; p rep.lambda
198
+ print " rep.entropy #=> "; p rep.entropy
199
+ puts
200
+ print " rep.num_hits #=> "; p rep.num_hits
201
+ print " rep.gapped_kappa #=> "; p rep.gapped_kappa
202
+ print " rep.gapped_lambda #=> "; p rep.gapped_lambda
203
+ print " rep.gapped_entropy #=> "; p rep.gapped_entropy
204
+ print " rep.posted_date #=> "; p rep.posted_date
205
+ puts
206
+
207
+ #@#print "# === Message (last iteration's)\n"
208
+ #@#puts
209
+ #@#print " rep.message #=> "; p rep.message
210
+ #puts
211
+ #@#print " rep.converged? #=> "; p rep.converged?
212
+ #@#puts
213
+
214
+ print "# === Iterations\n"
215
+ puts
216
+ print " rep.itrerations.each do |itr|\n"
217
+ puts
218
+
219
+ rep.iterations.each do |itr|
220
+
221
+ print "# --- Bio::Blast::Bl2seq::Report::Iteration\n"
222
+ puts
223
+
224
+ print " itr.num #=> "; p itr.num
225
+ #print " itr.statistics #=> "; p itr.statistics
226
+ #@#print " itr.message #=> "; p itr.message
227
+ print " itr.hits.size #=> "; p itr.hits.size
228
+ #puts
229
+ #@#print " itr.hits_newly_found.size #=> "; p itr.hits_newly_found.size;
230
+ #@#print " itr.hits_found_again.size #=> "; p itr.hits_found_again.size;
231
+ #@#if itr.hits_for_pattern then
232
+ #@#itr.hits_for_pattern.each_with_index do |hp, hpi|
233
+ #@#print " itr.hits_for_pattern[#{hpi}].size #=> "; p hp.size;
234
+ #@#end
235
+ #@#end
236
+ #@#print " itr.converged? #=> "; p itr.converged?
237
+ puts
238
+
239
+ print " itr.hits.each do |hit|\n"
240
+ puts
241
+
242
+ itr.hits.each_with_index do |hit, i|
243
+
244
+ print "# --- Bio::Blast::Bl2seq::Default::Report::Hit"
245
+ print " ([#{i}])\n"
246
+ puts
247
+
248
+ #print " hit.num #=> "; p hit.num
249
+ #print " hit.hit_id #=> "; p hit.hit_id
250
+ print " hit.len #=> "; p hit.len
251
+ print " hit.definition #=> "; p hit.definition
252
+ #print " hit.accession #=> "; p hit.accession
253
+ #puts
254
+ print " hit.found_again? #=> "; p hit.found_again?
255
+
256
+ print " --- compatible/shortcut ---\n"
257
+ #print " hit.query_id #=> "; p hit.query_id
258
+ #print " hit.query_def #=> "; p hit.query_def
259
+ #print " hit.query_len #=> "; p hit.query_len
260
+ #print " hit.target_id #=> "; p hit.target_id
261
+ print " hit.target_def #=> "; p hit.target_def
262
+ print " hit.target_len #=> "; p hit.target_len
263
+
264
+ print " --- first HSP's values (shortcut) ---\n"
265
+ print " hit.evalue #=> "; p hit.evalue
266
+ print " hit.bit_score #=> "; p hit.bit_score
267
+ print " hit.identity #=> "; p hit.identity
268
+ #print " hit.overlap #=> "; p hit.overlap
269
+
270
+ print " hit.query_seq #=> "; p hit.query_seq
271
+ print " hit.midline #=> "; p hit.midline
272
+ print " hit.target_seq #=> "; p hit.target_seq
273
+
274
+ print " hit.query_start #=> "; p hit.query_start
275
+ print " hit.query_end #=> "; p hit.query_end
276
+ print " hit.target_start #=> "; p hit.target_start
277
+ print " hit.target_end #=> "; p hit.target_end
278
+ print " hit.lap_at #=> "; p hit.lap_at
279
+ print " --- first HSP's vaules (shortcut) ---\n"
280
+ print " --- compatible/shortcut ---\n"
281
+
282
+ puts
283
+ print " hit.hsps.size #=> "; p hit.hsps.size
284
+ if hit.hsps.size == 0 then
285
+ puts " (HSP not found: please see blastall's -b and -v options)"
286
+ puts
287
+ else
288
+
289
+ puts
290
+ print " hit.hsps.each do |hsp|\n"
291
+ puts
292
+
293
+ hit.hsps.each_with_index do |hsp, j|
294
+
295
+ print "# --- Bio::Blast::Default::Report::HSP (Bio::Blast::Bl2seq::Report::HSP)"
296
+ print " ([#{j}])\n"
297
+ puts
298
+ #print " hsp.num #=> "; p hsp.num
299
+ print " hsp.bit_score #=> "; p hsp.bit_score
300
+ print " hsp.score #=> "; p hsp.score
301
+ print " hsp.evalue #=> "; p hsp.evalue
302
+ print " hsp.identity #=> "; p hsp.identity
303
+ print " hsp.gaps #=> "; p hsp.gaps
304
+ print " hsp.positive #=> "; p hsp.positive
305
+ print " hsp.align_len #=> "; p hsp.align_len
306
+ #print " hsp.density #=> "; p hsp.density
307
+
308
+ print " hsp.query_frame #=> "; p hsp.query_frame
309
+ print " hsp.query_from #=> "; p hsp.query_from
310
+ print " hsp.query_to #=> "; p hsp.query_to
311
+
312
+ print " hsp.hit_frame #=> "; p hsp.hit_frame
313
+ print " hsp.hit_from #=> "; p hsp.hit_from
314
+ print " hsp.hit_to #=> "; p hsp.hit_to
315
+
316
+ #print " hsp.pattern_from#=> "; p hsp.pattern_from
317
+ #print " hsp.pattern_to #=> "; p hsp.pattern_to
318
+
319
+ print " hsp.qseq #=> "; p hsp.qseq
320
+ print " hsp.midline #=> "; p hsp.midline
321
+ print " hsp.hseq #=> "; p hsp.hseq
322
+ puts
323
+ print " hsp.percent_identity #=> "; p hsp.percent_identity
324
+ #print " hsp.mismatch_count #=> "; p hsp.mismatch_count
325
+ #
326
+ print " hsp.query_strand #=> "; p hsp.query_strand
327
+ print " hsp.hit_strand #=> "; p hsp.hit_strand
328
+ print " hsp.percent_positive #=> "; p hsp.percent_positive
329
+ print " hsp.percent_gaps #=> "; p hsp.percent_gaps
330
+ puts
331
+
332
+ end #each
333
+ end #if hit.hsps.size == 0
334
+ end
335
+ end
336
+ end #ff.each
337
+ end #FlatFile.open
338
+
339
+ end #if __FILE__ == $0
340
+
341
+ ######################################################################
342
+
343
+ =begin
344
+
345
+ = Bio::Blast::Bl2seq::Report
346
+
347
+ NCBI bl2seq (BLAST 2 sequences) output parser
348
+
349
+ =end
350
+
@@ -0,0 +1,269 @@
1
+ #
2
+ # bio/appl/blast.rb - BLAST wrapper
3
+ #
4
+ # Copyright (C) 2001 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # Copyright (C) 2002,2003 KATAYAMA Toshiaki <k@bioruby.org>
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ # $Id: blast.rb,v 1.27 2005/12/18 17:28:55 nakao Exp $
22
+ #
23
+
24
+ require 'net/http'
25
+ require 'cgi' unless defined?(CGI)
26
+ require 'bio/command'
27
+ require 'shellwords'
28
+
29
+ module Bio
30
+
31
+ class Blast
32
+
33
+ autoload :Fastacmd, 'bio/io/fastacmd'
34
+ autoload :Report, 'bio/appl/blast/report'
35
+ autoload :Default, 'bio/appl/blast/format0'
36
+ autoload :WU, 'bio/appl/blast/wublast'
37
+ autoload :Bl2seq, 'bio/appl/bl2seq/report'
38
+
39
+ include Bio::Command::Tools
40
+
41
+ def initialize(program, db, opt = [], server = 'local')
42
+ @program = program
43
+ @db = db
44
+ @server = server
45
+
46
+ @blastall = 'blastall'
47
+ @matrix = nil
48
+ @filter = nil
49
+
50
+ @output = ''
51
+ @parser = nil
52
+
53
+ begin
54
+ a = opt.to_ary
55
+ rescue NameError #NoMethodError
56
+ # backward compatibility
57
+ a = Shellwords.shellwords(opt)
58
+ end
59
+ unless a.find { |x| /\A\-m/ =~ x.to_s } then
60
+ if defined?(XMLParser) or defined?(REXML)
61
+ @format = 7
62
+ else
63
+ @format = 8
64
+ end
65
+ end
66
+ @options = [ *a ]
67
+ end
68
+ attr_accessor :program, :db, :options, :server, :blastall, :matrix, :filter
69
+ attr_reader :output, :format
70
+ attr_writer :parser # to change :xmlparser, :rexml, :tab
71
+
72
+ def self.local(program, db, option = '')
73
+ self.new(program, db, option, 'local')
74
+ end
75
+
76
+ def self.remote(program, db, option = '', server = 'genomenet')
77
+ self.new(program, db, option, server)
78
+ end
79
+
80
+ def query(query)
81
+ return self.send("exec_#{@server}", query.to_s)
82
+ end
83
+
84
+ def option
85
+ # backward compatibility
86
+ make_command_line(@options)
87
+ end
88
+
89
+ def option=(str)
90
+ # backward compatibility
91
+ @options = Shellwords.shellwords(str)
92
+ end
93
+
94
+ # the method Bio::Blast.report is moved from bio/appl/blast/report.rb.
95
+ # only for xml format
96
+ def self.reports(input, parser = nil)
97
+ ary = []
98
+ input.each("</BlastOutput>\n") do |xml|
99
+ xml.sub!(/[^<]*(<?)/, '\1') # skip before <?xml> tag
100
+ next if xml.empty? # skip trailing no hits
101
+ if block_given?
102
+ yield Report.new(xml, parser)
103
+ else
104
+ ary << Report.new(xml, parser)
105
+ end
106
+ end
107
+ return ary
108
+ end
109
+
110
+
111
+ private
112
+
113
+
114
+ def parse_result(data)
115
+ Report.new(data, @parser)
116
+ end
117
+
118
+
119
+ def exec_local(query)
120
+ cmd = [ @blastall, '-p', @program, '-d', @db ]
121
+ cmd.concat([ '-M', @matrix ]) if @matrix
122
+ cmd.concat([ '-F', @filter ]) if @filter
123
+ cmd.concat([ '-m', @format.to_s ]) if @format
124
+ cmd.concat(@options) if @options
125
+
126
+ report = nil
127
+
128
+ @output = call_command_local(cmd, query)
129
+ report = parse_result(@output)
130
+
131
+ return report
132
+ end
133
+
134
+
135
+ def exec_genomenet(query)
136
+ host = "blast.genome.jp"
137
+ #path = "/sit-bin/nph-blast"
138
+ path = "/sit-bin/blast" #2005.08.12
139
+
140
+ matrix = @matrix ? @matrix : 'blosum62'
141
+ filter = @filter ? @filter : 'T'
142
+
143
+ opt = []
144
+ opt.concat([ '-m', @format.to_s ]) if @format
145
+ opt.concat(@options) if @options
146
+
147
+ form = {
148
+ 'style' => 'raw',
149
+ 'prog' => @program,
150
+ 'dbname' => @db,
151
+ 'sequence' => CGI.escape(query),
152
+ 'other_param' => CGI.escape(make_command_line_unix(opt)),
153
+ 'matrix' => matrix,
154
+ 'filter' => filter,
155
+ 'V_value' => 500, # default value for GenomeNet
156
+ 'B_value' => 250, # default value for GenomeNet
157
+ 'alignment_view' => 0,
158
+ }
159
+
160
+ data = []
161
+
162
+ form.each do |k, v|
163
+ data.push("#{k}=#{v}") if v
164
+ end
165
+
166
+ report = nil
167
+
168
+ begin
169
+ http = Net::HTTP.new(host)
170
+ http.open_timeout = 300
171
+ http.read_timeout = 600
172
+ result, = http.post(path, data.join('&'))
173
+ @output = result.body
174
+ # workaround 2005.08.12
175
+ if /\<A +HREF=\"(http\:\/\/blast\.genome\.jp(\/tmp\/[^\"]+))\"\>Show all result\<\/A\>/i =~ @output.to_s then
176
+ result, = http.get($2)
177
+ @output = result.body
178
+ txt = @output.to_s.split(/\<pre\>/)[1]
179
+ raise 'cannot understand response' unless txt
180
+ txt.sub!(/\<\/pre\>.*\z/m, '')
181
+ txt.sub!(/.*^ \-{20,}\s*/m, '')
182
+ @output = txt.gsub(/\&lt\;/, '<')
183
+ report = parse_result(@output)
184
+ else
185
+ raise 'cannot understand response'
186
+ end
187
+ end
188
+
189
+ return report
190
+ end
191
+
192
+
193
+ def exec_ncbi(query)
194
+ raise NotImplementedError
195
+ end
196
+ end
197
+
198
+ end
199
+
200
+
201
+ if __FILE__ == $0
202
+ begin
203
+ require 'pp'
204
+ alias p pp
205
+ rescue
206
+ end
207
+
208
+ # serv = Bio::Blast.local('blastn', 'hoge.nuc')
209
+ # serv = Bio::Blast.local('blastp', 'hoge.pep')
210
+ serv = Bio::Blast.remote('blastp', 'genes')
211
+
212
+ query = ARGF.read
213
+ p serv.query(query)
214
+ end
215
+
216
+
217
+ =begin
218
+
219
+ = Bio::Blast
220
+
221
+ --- Bio::Blast.new(program, db, option = '', server = 'local')
222
+ --- Bio::Blast.local(program, db, option = '')
223
+ --- Bio::Blast.remote(program, db, option = '', server = 'genomenet')
224
+
225
+ Returns a blast factory object (Bio::Blast).
226
+
227
+ For the develpper, you can add server 'hoge' by adding
228
+ exec_hoge(query) method.
229
+
230
+ --- Bio::Blast#query(query)
231
+
232
+ Execute blast search and returns Report object (Bio::Blast::Report).
233
+
234
+ --- Bio::Blast#output
235
+
236
+ Returns a String containing blast execution output in as is format.
237
+
238
+ --- Bio::Blast#program
239
+ --- Bio::Blast#db
240
+ --- Bio::Blast#options
241
+ --- Bio::Blast#server
242
+ --- Bio::Blast#blastall
243
+ --- Bio::Blast#filter
244
+
245
+ Accessors for the factory parameters.
246
+
247
+ --- Bio::Blast#option
248
+ --- Bio::Blast#option=(str)
249
+
250
+ Get/set options by string.
251
+
252
+ == Available databases for Blast.remote(@program, @db, option, 'genomenet')
253
+
254
+ # ----------+-------+---------------------------------------------------
255
+ # @program | query | @db (supported in GenomeNet)
256
+ # ----------+-------+---------------------------------------------------
257
+ # blastp | AA | nr-aa, genes, vgenes.pep, swissprot, swissprot-upd,
258
+ # ----------+-------+ pir, prf, pdbstr
259
+ # blastx | NA |
260
+ # ----------+-------+---------------------------------------------------
261
+ # blastn | NA | nr-nt, genbank-nonst, gbnonst-upd, dbest, dbgss,
262
+ # ----------+-------+ htgs, dbsts, embl-nonst, embnonst-upd, epd,
263
+ # tblastn | AA | genes-nt, genome, vgenes.nuc
264
+ # ----------+-------+---------------------------------------------------
265
+
266
+ See http://blast.genome.jp/ideas/ideas.html#blast for more details.
267
+
268
+ =end
269
+