bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,350 @@
1
+ #
2
+ # = bio/appl/bl2seq/report.rb - bl2seq (BLAST 2 sequences) parser
3
+ #
4
+ # Copyright:: Copyright (C) 2005 GOTO Naohisa <ng@bioruby.org>
5
+ # License:: LGPL
6
+ #
7
+ #--
8
+ # This library is free software; you can redistribute it and/or
9
+ # modify it under the terms of the GNU Lesser General Public
10
+ # License as published by the Free Software Foundation; either
11
+ # version 2 of the License, or (at your option) any later version.
12
+ #
13
+ # This library is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ # Lesser General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU Lesser General Public
19
+ # License along with this library; if not, write to the Free Software
20
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
+ #++
22
+ #
23
+ # $Id: report.rb,v 1.6 2005/12/18 15:58:39 k Exp $
24
+ #
25
+ # Bio::Bl2seq::Report is a NCBI bl2seq (BLAST 2 sequences) output parser.
26
+ #
27
+ # = Acknowledgements
28
+ #
29
+ # Thanks to Tomoaki NISHIYAMA <tomoakin __at__ kenroku.kanazawa-u.ac.jp>
30
+ # for providing bl2seq parser patches based on
31
+ # lib/bio/appl/blast/format0.rb.
32
+ #
33
+
34
+ require 'bio/appl/blast/format0'
35
+
36
+ module Bio
37
+ class Blast
38
+
39
+ class Bl2seq
40
+
41
+ # Bio::Bl2seq::Report is a NCBI bl2seq (BLAST 2 sequences) output parser.
42
+ # It inherits Bio::Blast::Default::Report.
43
+ # Most of its methods are the same as Bio::Blast::Default::Report,
44
+ # but it lacks many methods.
45
+ class Report < Bio::Blast::Default::Report
46
+
47
+ # Delimiter of each entry. Bio::FlatFile uses it.
48
+ # In Bio::Bl2seq::Report, it it nil (1 entry 1 file).
49
+ DELIMITER = RS = nil
50
+
51
+ undef format0_parse_header
52
+ undef program, version, version_number, version_date,
53
+ message, converged?, reference, db
54
+
55
+ # Splits headers.
56
+ def format0_split_headers(data)
57
+ @f0query = data.shift
58
+ end
59
+ private :format0_split_headers
60
+
61
+ # Splits the search results.
62
+ def format0_split_search(data)
63
+ iterations = []
64
+ while r = data[0] and /^\>/ =~ r
65
+ iterations << Iteration.new(data)
66
+ end
67
+ if iterations.size <= 0 then
68
+ iterations << Iteration.new(data)
69
+ end
70
+ iterations
71
+ end
72
+ private :format0_split_search
73
+
74
+ # Stores format0 database statistics.
75
+ # Internal use only. Users must not use the class.
76
+ class F0dbstat < Bio::Blast::Default::Report::F0dbstat #:nodoc:
77
+ # Returns number of sequences in database.
78
+ def db_num
79
+ unless defined?(@db_num)
80
+ parse_params
81
+ @db_num = @hash['Number of Sequences'].to_i
82
+ end
83
+ @db_num
84
+ end
85
+
86
+ # Returns number of letters in database.
87
+ def db_len
88
+ unless defined?(@db_len)
89
+ parse_params
90
+ @db_len = @hash['length of database'].to_i
91
+ end
92
+ @db_len
93
+ end
94
+ end #class F0dbstat
95
+
96
+ # Bio::Bl2seq::Report::Iteration stores information about
97
+ # a iteration.
98
+ # Normally, it may contain some Bio::Bl2seq::Report::Hit objects.
99
+ #
100
+ # Note that its main existance reason is to keep complatibility
101
+ # between Bio::Blast::Default::Report::* classes.
102
+ class Iteration < Bio::Blast::Default::Report::Iteration
103
+ # Creates a new Iteration object.
104
+ # It is designed to be called only internally from
105
+ # the Bio::Blast::Default::Report class.
106
+ # Users shall not use the method directly.
107
+ def initialize(data)
108
+ @f0stat = []
109
+ @f0dbstat = Bio::Blast::Default::Report::AlwaysNil.instance
110
+ @hits = []
111
+ @num = 1
112
+ while r = data[0] and /^\>/ =~ r
113
+ @hits << Hit.new(data)
114
+ end
115
+ end
116
+
117
+ # Returns the hits of the iteration.
118
+ # It returns an array of Bio::Bl2seq::Report::Hit objects.
119
+ def hits; @hits; end
120
+
121
+ undef message, pattern_in_database,
122
+ pattern, pattern_positions, hits_found_again,
123
+ hits_newly_found, hits_for_pattern, parse_hitlist,
124
+ converged?
125
+ end #class Iteration
126
+
127
+ # Bio::Bl2seq::Report::Hit contains information about a hit.
128
+ # It may contain some Bio::Blast::Default::Report::HSP objects.
129
+ # All methods are the same as Bio::Blast::Default::Report::Hit class.
130
+ # Please refer to Bio::Blast::Default::Report::Hit.
131
+ class Hit < Bio::Blast::Default::Report::Hit
132
+ end #class Hit
133
+
134
+ # Bio::Bl2seq::Report::HSP holds information about the hsp
135
+ # (high-scoring segment pair).
136
+ # NOTE that the HSP class below is NOT used because
137
+ # Ruby's constants namespace are normally statically determined
138
+ # and HSP object is created in Bio::Blast::Default::Report::Hit class.
139
+ # Please refer to Bio::Blast::Default::Report::HSP.
140
+ class HSP < Bio::Blast::Default::Report::HSP
141
+ end #class HSP
142
+
143
+ end #class Report
144
+ end #class Bl2seq
145
+
146
+ end #class Blast
147
+ end #module Bio
148
+
149
+ ######################################################################
150
+
151
+ if __FILE__ == $0
152
+
153
+ Bio::FlatFile.open(Bio::Blast::Bl2seq::Report, ARGF) do |ff|
154
+ ff.each do |rep|
155
+
156
+ print "# === Bio::Blast::Bl2seq::Report\n"
157
+ puts
158
+ #@#print " rep.program #=> "; p rep.program
159
+ #@#print " rep.version #=> "; p rep.version
160
+ #@#print " rep.reference #=> "; p rep.reference
161
+ #@#print " rep.db #=> "; p rep.db
162
+ #print " rep.query_id #=> "; p rep.query_id
163
+ print " rep.query_def #=> "; p rep.query_def
164
+ print " rep.query_len #=> "; p rep.query_len
165
+ #puts
166
+ #@#print " rep.version_number #=> "; p rep.version_number
167
+ #@#print " rep.version_date #=> "; p rep.version_date
168
+ puts
169
+
170
+ print "# === Parameters\n"
171
+ #puts
172
+ #print " rep.parameters #=> "; p rep.parameters
173
+ puts
174
+ print " rep.matrix #=> "; p rep.matrix
175
+ print " rep.expect #=> "; p rep.expect
176
+ #print " rep.inclusion #=> "; p rep.inclusion
177
+ print " rep.sc_match #=> "; p rep.sc_match
178
+ print " rep.sc_mismatch #=> "; p rep.sc_mismatch
179
+ print " rep.gap_open #=> "; p rep.gap_open
180
+ print " rep.gap_extend #=> "; p rep.gap_extend
181
+ #print " rep.filter #=> "; p rep.filter
182
+ #@#print " rep.pattern #=> "; p rep.pattern
183
+ #print " rep.entrez_query #=> "; p rep.entrez_query
184
+ #puts
185
+ #@#print " rep.pattern_positions #=> "; p rep.pattern_positions
186
+ puts
187
+
188
+ print "# === Statistics (last iteration's)\n"
189
+ #puts
190
+ #print " rep.statistics #=> "; p rep.statistics
191
+ puts
192
+ print " rep.db_num #=> "; p rep.db_num
193
+ print " rep.db_len #=> "; p rep.db_len
194
+ #print " rep.hsp_len #=> "; p rep.hsp_len
195
+ print " rep.eff_space #=> "; p rep.eff_space
196
+ print " rep.kappa #=> "; p rep.kappa
197
+ print " rep.lambda #=> "; p rep.lambda
198
+ print " rep.entropy #=> "; p rep.entropy
199
+ puts
200
+ print " rep.num_hits #=> "; p rep.num_hits
201
+ print " rep.gapped_kappa #=> "; p rep.gapped_kappa
202
+ print " rep.gapped_lambda #=> "; p rep.gapped_lambda
203
+ print " rep.gapped_entropy #=> "; p rep.gapped_entropy
204
+ print " rep.posted_date #=> "; p rep.posted_date
205
+ puts
206
+
207
+ #@#print "# === Message (last iteration's)\n"
208
+ #@#puts
209
+ #@#print " rep.message #=> "; p rep.message
210
+ #puts
211
+ #@#print " rep.converged? #=> "; p rep.converged?
212
+ #@#puts
213
+
214
+ print "# === Iterations\n"
215
+ puts
216
+ print " rep.itrerations.each do |itr|\n"
217
+ puts
218
+
219
+ rep.iterations.each do |itr|
220
+
221
+ print "# --- Bio::Blast::Bl2seq::Report::Iteration\n"
222
+ puts
223
+
224
+ print " itr.num #=> "; p itr.num
225
+ #print " itr.statistics #=> "; p itr.statistics
226
+ #@#print " itr.message #=> "; p itr.message
227
+ print " itr.hits.size #=> "; p itr.hits.size
228
+ #puts
229
+ #@#print " itr.hits_newly_found.size #=> "; p itr.hits_newly_found.size;
230
+ #@#print " itr.hits_found_again.size #=> "; p itr.hits_found_again.size;
231
+ #@#if itr.hits_for_pattern then
232
+ #@#itr.hits_for_pattern.each_with_index do |hp, hpi|
233
+ #@#print " itr.hits_for_pattern[#{hpi}].size #=> "; p hp.size;
234
+ #@#end
235
+ #@#end
236
+ #@#print " itr.converged? #=> "; p itr.converged?
237
+ puts
238
+
239
+ print " itr.hits.each do |hit|\n"
240
+ puts
241
+
242
+ itr.hits.each_with_index do |hit, i|
243
+
244
+ print "# --- Bio::Blast::Bl2seq::Default::Report::Hit"
245
+ print " ([#{i}])\n"
246
+ puts
247
+
248
+ #print " hit.num #=> "; p hit.num
249
+ #print " hit.hit_id #=> "; p hit.hit_id
250
+ print " hit.len #=> "; p hit.len
251
+ print " hit.definition #=> "; p hit.definition
252
+ #print " hit.accession #=> "; p hit.accession
253
+ #puts
254
+ print " hit.found_again? #=> "; p hit.found_again?
255
+
256
+ print " --- compatible/shortcut ---\n"
257
+ #print " hit.query_id #=> "; p hit.query_id
258
+ #print " hit.query_def #=> "; p hit.query_def
259
+ #print " hit.query_len #=> "; p hit.query_len
260
+ #print " hit.target_id #=> "; p hit.target_id
261
+ print " hit.target_def #=> "; p hit.target_def
262
+ print " hit.target_len #=> "; p hit.target_len
263
+
264
+ print " --- first HSP's values (shortcut) ---\n"
265
+ print " hit.evalue #=> "; p hit.evalue
266
+ print " hit.bit_score #=> "; p hit.bit_score
267
+ print " hit.identity #=> "; p hit.identity
268
+ #print " hit.overlap #=> "; p hit.overlap
269
+
270
+ print " hit.query_seq #=> "; p hit.query_seq
271
+ print " hit.midline #=> "; p hit.midline
272
+ print " hit.target_seq #=> "; p hit.target_seq
273
+
274
+ print " hit.query_start #=> "; p hit.query_start
275
+ print " hit.query_end #=> "; p hit.query_end
276
+ print " hit.target_start #=> "; p hit.target_start
277
+ print " hit.target_end #=> "; p hit.target_end
278
+ print " hit.lap_at #=> "; p hit.lap_at
279
+ print " --- first HSP's vaules (shortcut) ---\n"
280
+ print " --- compatible/shortcut ---\n"
281
+
282
+ puts
283
+ print " hit.hsps.size #=> "; p hit.hsps.size
284
+ if hit.hsps.size == 0 then
285
+ puts " (HSP not found: please see blastall's -b and -v options)"
286
+ puts
287
+ else
288
+
289
+ puts
290
+ print " hit.hsps.each do |hsp|\n"
291
+ puts
292
+
293
+ hit.hsps.each_with_index do |hsp, j|
294
+
295
+ print "# --- Bio::Blast::Default::Report::HSP (Bio::Blast::Bl2seq::Report::HSP)"
296
+ print " ([#{j}])\n"
297
+ puts
298
+ #print " hsp.num #=> "; p hsp.num
299
+ print " hsp.bit_score #=> "; p hsp.bit_score
300
+ print " hsp.score #=> "; p hsp.score
301
+ print " hsp.evalue #=> "; p hsp.evalue
302
+ print " hsp.identity #=> "; p hsp.identity
303
+ print " hsp.gaps #=> "; p hsp.gaps
304
+ print " hsp.positive #=> "; p hsp.positive
305
+ print " hsp.align_len #=> "; p hsp.align_len
306
+ #print " hsp.density #=> "; p hsp.density
307
+
308
+ print " hsp.query_frame #=> "; p hsp.query_frame
309
+ print " hsp.query_from #=> "; p hsp.query_from
310
+ print " hsp.query_to #=> "; p hsp.query_to
311
+
312
+ print " hsp.hit_frame #=> "; p hsp.hit_frame
313
+ print " hsp.hit_from #=> "; p hsp.hit_from
314
+ print " hsp.hit_to #=> "; p hsp.hit_to
315
+
316
+ #print " hsp.pattern_from#=> "; p hsp.pattern_from
317
+ #print " hsp.pattern_to #=> "; p hsp.pattern_to
318
+
319
+ print " hsp.qseq #=> "; p hsp.qseq
320
+ print " hsp.midline #=> "; p hsp.midline
321
+ print " hsp.hseq #=> "; p hsp.hseq
322
+ puts
323
+ print " hsp.percent_identity #=> "; p hsp.percent_identity
324
+ #print " hsp.mismatch_count #=> "; p hsp.mismatch_count
325
+ #
326
+ print " hsp.query_strand #=> "; p hsp.query_strand
327
+ print " hsp.hit_strand #=> "; p hsp.hit_strand
328
+ print " hsp.percent_positive #=> "; p hsp.percent_positive
329
+ print " hsp.percent_gaps #=> "; p hsp.percent_gaps
330
+ puts
331
+
332
+ end #each
333
+ end #if hit.hsps.size == 0
334
+ end
335
+ end
336
+ end #ff.each
337
+ end #FlatFile.open
338
+
339
+ end #if __FILE__ == $0
340
+
341
+ ######################################################################
342
+
343
+ =begin
344
+
345
+ = Bio::Blast::Bl2seq::Report
346
+
347
+ NCBI bl2seq (BLAST 2 sequences) output parser
348
+
349
+ =end
350
+
@@ -0,0 +1,269 @@
1
+ #
2
+ # bio/appl/blast.rb - BLAST wrapper
3
+ #
4
+ # Copyright (C) 2001 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # Copyright (C) 2002,2003 KATAYAMA Toshiaki <k@bioruby.org>
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ # $Id: blast.rb,v 1.27 2005/12/18 17:28:55 nakao Exp $
22
+ #
23
+
24
+ require 'net/http'
25
+ require 'cgi' unless defined?(CGI)
26
+ require 'bio/command'
27
+ require 'shellwords'
28
+
29
+ module Bio
30
+
31
+ class Blast
32
+
33
+ autoload :Fastacmd, 'bio/io/fastacmd'
34
+ autoload :Report, 'bio/appl/blast/report'
35
+ autoload :Default, 'bio/appl/blast/format0'
36
+ autoload :WU, 'bio/appl/blast/wublast'
37
+ autoload :Bl2seq, 'bio/appl/bl2seq/report'
38
+
39
+ include Bio::Command::Tools
40
+
41
+ def initialize(program, db, opt = [], server = 'local')
42
+ @program = program
43
+ @db = db
44
+ @server = server
45
+
46
+ @blastall = 'blastall'
47
+ @matrix = nil
48
+ @filter = nil
49
+
50
+ @output = ''
51
+ @parser = nil
52
+
53
+ begin
54
+ a = opt.to_ary
55
+ rescue NameError #NoMethodError
56
+ # backward compatibility
57
+ a = Shellwords.shellwords(opt)
58
+ end
59
+ unless a.find { |x| /\A\-m/ =~ x.to_s } then
60
+ if defined?(XMLParser) or defined?(REXML)
61
+ @format = 7
62
+ else
63
+ @format = 8
64
+ end
65
+ end
66
+ @options = [ *a ]
67
+ end
68
+ attr_accessor :program, :db, :options, :server, :blastall, :matrix, :filter
69
+ attr_reader :output, :format
70
+ attr_writer :parser # to change :xmlparser, :rexml, :tab
71
+
72
+ def self.local(program, db, option = '')
73
+ self.new(program, db, option, 'local')
74
+ end
75
+
76
+ def self.remote(program, db, option = '', server = 'genomenet')
77
+ self.new(program, db, option, server)
78
+ end
79
+
80
+ def query(query)
81
+ return self.send("exec_#{@server}", query.to_s)
82
+ end
83
+
84
+ def option
85
+ # backward compatibility
86
+ make_command_line(@options)
87
+ end
88
+
89
+ def option=(str)
90
+ # backward compatibility
91
+ @options = Shellwords.shellwords(str)
92
+ end
93
+
94
+ # the method Bio::Blast.report is moved from bio/appl/blast/report.rb.
95
+ # only for xml format
96
+ def self.reports(input, parser = nil)
97
+ ary = []
98
+ input.each("</BlastOutput>\n") do |xml|
99
+ xml.sub!(/[^<]*(<?)/, '\1') # skip before <?xml> tag
100
+ next if xml.empty? # skip trailing no hits
101
+ if block_given?
102
+ yield Report.new(xml, parser)
103
+ else
104
+ ary << Report.new(xml, parser)
105
+ end
106
+ end
107
+ return ary
108
+ end
109
+
110
+
111
+ private
112
+
113
+
114
+ def parse_result(data)
115
+ Report.new(data, @parser)
116
+ end
117
+
118
+
119
+ def exec_local(query)
120
+ cmd = [ @blastall, '-p', @program, '-d', @db ]
121
+ cmd.concat([ '-M', @matrix ]) if @matrix
122
+ cmd.concat([ '-F', @filter ]) if @filter
123
+ cmd.concat([ '-m', @format.to_s ]) if @format
124
+ cmd.concat(@options) if @options
125
+
126
+ report = nil
127
+
128
+ @output = call_command_local(cmd, query)
129
+ report = parse_result(@output)
130
+
131
+ return report
132
+ end
133
+
134
+
135
+ def exec_genomenet(query)
136
+ host = "blast.genome.jp"
137
+ #path = "/sit-bin/nph-blast"
138
+ path = "/sit-bin/blast" #2005.08.12
139
+
140
+ matrix = @matrix ? @matrix : 'blosum62'
141
+ filter = @filter ? @filter : 'T'
142
+
143
+ opt = []
144
+ opt.concat([ '-m', @format.to_s ]) if @format
145
+ opt.concat(@options) if @options
146
+
147
+ form = {
148
+ 'style' => 'raw',
149
+ 'prog' => @program,
150
+ 'dbname' => @db,
151
+ 'sequence' => CGI.escape(query),
152
+ 'other_param' => CGI.escape(make_command_line_unix(opt)),
153
+ 'matrix' => matrix,
154
+ 'filter' => filter,
155
+ 'V_value' => 500, # default value for GenomeNet
156
+ 'B_value' => 250, # default value for GenomeNet
157
+ 'alignment_view' => 0,
158
+ }
159
+
160
+ data = []
161
+
162
+ form.each do |k, v|
163
+ data.push("#{k}=#{v}") if v
164
+ end
165
+
166
+ report = nil
167
+
168
+ begin
169
+ http = Net::HTTP.new(host)
170
+ http.open_timeout = 300
171
+ http.read_timeout = 600
172
+ result, = http.post(path, data.join('&'))
173
+ @output = result.body
174
+ # workaround 2005.08.12
175
+ if /\<A +HREF=\"(http\:\/\/blast\.genome\.jp(\/tmp\/[^\"]+))\"\>Show all result\<\/A\>/i =~ @output.to_s then
176
+ result, = http.get($2)
177
+ @output = result.body
178
+ txt = @output.to_s.split(/\<pre\>/)[1]
179
+ raise 'cannot understand response' unless txt
180
+ txt.sub!(/\<\/pre\>.*\z/m, '')
181
+ txt.sub!(/.*^ \-{20,}\s*/m, '')
182
+ @output = txt.gsub(/\&lt\;/, '<')
183
+ report = parse_result(@output)
184
+ else
185
+ raise 'cannot understand response'
186
+ end
187
+ end
188
+
189
+ return report
190
+ end
191
+
192
+
193
+ def exec_ncbi(query)
194
+ raise NotImplementedError
195
+ end
196
+ end
197
+
198
+ end
199
+
200
+
201
+ if __FILE__ == $0
202
+ begin
203
+ require 'pp'
204
+ alias p pp
205
+ rescue
206
+ end
207
+
208
+ # serv = Bio::Blast.local('blastn', 'hoge.nuc')
209
+ # serv = Bio::Blast.local('blastp', 'hoge.pep')
210
+ serv = Bio::Blast.remote('blastp', 'genes')
211
+
212
+ query = ARGF.read
213
+ p serv.query(query)
214
+ end
215
+
216
+
217
+ =begin
218
+
219
+ = Bio::Blast
220
+
221
+ --- Bio::Blast.new(program, db, option = '', server = 'local')
222
+ --- Bio::Blast.local(program, db, option = '')
223
+ --- Bio::Blast.remote(program, db, option = '', server = 'genomenet')
224
+
225
+ Returns a blast factory object (Bio::Blast).
226
+
227
+ For the develpper, you can add server 'hoge' by adding
228
+ exec_hoge(query) method.
229
+
230
+ --- Bio::Blast#query(query)
231
+
232
+ Execute blast search and returns Report object (Bio::Blast::Report).
233
+
234
+ --- Bio::Blast#output
235
+
236
+ Returns a String containing blast execution output in as is format.
237
+
238
+ --- Bio::Blast#program
239
+ --- Bio::Blast#db
240
+ --- Bio::Blast#options
241
+ --- Bio::Blast#server
242
+ --- Bio::Blast#blastall
243
+ --- Bio::Blast#filter
244
+
245
+ Accessors for the factory parameters.
246
+
247
+ --- Bio::Blast#option
248
+ --- Bio::Blast#option=(str)
249
+
250
+ Get/set options by string.
251
+
252
+ == Available databases for Blast.remote(@program, @db, option, 'genomenet')
253
+
254
+ # ----------+-------+---------------------------------------------------
255
+ # @program | query | @db (supported in GenomeNet)
256
+ # ----------+-------+---------------------------------------------------
257
+ # blastp | AA | nr-aa, genes, vgenes.pep, swissprot, swissprot-upd,
258
+ # ----------+-------+ pir, prf, pdbstr
259
+ # blastx | NA |
260
+ # ----------+-------+---------------------------------------------------
261
+ # blastn | NA | nr-nt, genbank-nonst, gbnonst-upd, dbest, dbgss,
262
+ # ----------+-------+ htgs, dbsts, embl-nonst, embnonst-upd, epd,
263
+ # tblastn | AA | genes-nt, genome, vgenes.nuc
264
+ # ----------+-------+---------------------------------------------------
265
+
266
+ See http://blast.genome.jp/ideas/ideas.html#blast for more details.
267
+
268
+ =end
269
+