bio 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,222 @@
1
+ #
2
+ # bio/appl/blast/xmlparser.rb - BLAST XML output (-m 7) parser by XMLParser
3
+ #
4
+ # Copyright (C) 2001 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # Copyright (C) 2003 KATAYAMA Toshiaki <k@bioruby.org>
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ # $Id: xmlparser.rb,v 1.13 2005/09/08 01:22:08 k Exp $
22
+ #
23
+
24
+ begin
25
+ require 'xmlparser'
26
+ rescue LoadError
27
+ end
28
+
29
+ module Bio
30
+ class Blast
31
+ class Report
32
+
33
+ private
34
+
35
+ def xmlparser_parse(xml)
36
+ parser = XMLParser.new
37
+ def parser.default; end
38
+
39
+ begin
40
+ tag_stack = Array.new
41
+ hash = Hash.new
42
+
43
+ parser.parse(xml) do |type, name, data|
44
+ #print "type=#{type.inspect} name=#{name.inspect} data=#{data.inspect}\n" # for DEBUG
45
+ case type
46
+ when XMLParser::START_ELEM
47
+ tag_stack.push(name)
48
+ hash.update(data)
49
+ case name
50
+ when 'Iteration'
51
+ iteration = Iteration.new
52
+ @iterations.push(iteration)
53
+ when 'Hit'
54
+ hit = Hit.new
55
+ hit.query_id = @query_id
56
+ hit.query_def = @query_def
57
+ hit.query_len = @query_len
58
+ @iterations.last.hits.push(hit)
59
+ when 'Hsp'
60
+ hsp = Hsp.new
61
+ @iterations.last.hits.last.hsps.push(hsp)
62
+ end
63
+ when XMLParser::END_ELEM
64
+ case name
65
+ when /^BlastOutput/
66
+ xmlparser_parse_program(name,hash)
67
+ hash = Hash.new
68
+ when /^Parameters$/
69
+ xmlparser_parse_parameters(hash)
70
+ hash = Hash.new
71
+ when /^Iteration/
72
+ xmlparser_parse_iteration(name, hash)
73
+ hash = Hash.new
74
+ when /^Hit/
75
+ xmlparser_parse_hit(name, hash)
76
+ hash = Hash.new
77
+ when /^Hsp$/
78
+ xmlparser_parse_hsp(hash)
79
+ hash = Hash.new
80
+ when /^Statistics$/
81
+ xmlparser_parse_statistics(hash)
82
+ hash = Hash.new
83
+ end
84
+ tag_stack.pop
85
+ when XMLParser::CDATA
86
+ if hash[tag_stack.last].nil?
87
+ hash[tag_stack.last] = data unless data.strip.empty?
88
+ else
89
+ hash[tag_stack.last].concat(data) if data
90
+ end
91
+ when XMLParser::PI
92
+ end
93
+ end
94
+ rescue XMLParserError
95
+ line = parser.line
96
+ column = parser.column
97
+ print "Parse error at #{line}(#{column}) : #{$!}\n"
98
+ end
99
+ end
100
+
101
+
102
+ def xmlparser_parse_program(tag, hash)
103
+ case tag
104
+ when 'BlastOutput_program'
105
+ @program = hash[tag]
106
+ when 'BlastOutput_version'
107
+ @version = hash[tag]
108
+ when 'BlastOutput_reference'
109
+ @reference = hash[tag]
110
+ when 'BlastOutput_db'
111
+ @db = hash[tag].strip
112
+ when 'BlastOutput_query-ID'
113
+ @query_id = hash[tag]
114
+ when 'BlastOutput_query-def'
115
+ @query_def = hash[tag]
116
+ when 'BlastOutput_query-len'
117
+ @query_len = hash[tag].to_i
118
+ end
119
+ end
120
+
121
+ def xmlparser_parse_parameters(hash)
122
+ labels = {
123
+ 'matrix' => 'Parameters_matrix',
124
+ 'expect' => 'Parameters_expect',
125
+ 'include' => 'Parameters_include',
126
+ 'sc-match' => 'Parameters_sc-match',
127
+ 'sc-mismatch' => 'Parameters_sc-mismatch',
128
+ 'gap-open' => 'Parameters_gap-open',
129
+ 'gap-extend' => 'Parameters_gap-extend',
130
+ 'filter' => 'Parameters_filter',
131
+ 'pattern' => 'Parameters_pattern',
132
+ 'entrez-query'=> 'Parameters_entrez-query',
133
+ }
134
+ labels.each do |k,v|
135
+ case k
136
+ when 'filter', 'matrix'
137
+ @parameters[k] = hash[v].to_s
138
+ else
139
+ @parameters[k] = hash[v].to_i
140
+ end
141
+ end
142
+ end
143
+
144
+ def xmlparser_parse_iteration(tag, hash)
145
+ case tag
146
+ when 'Iteration_iter-num'
147
+ @iterations.last.num = hash[tag].to_i
148
+ when 'Iteration_message'
149
+ @iterations.last.message = hash[tag].to_s
150
+ end
151
+ end
152
+
153
+ def xmlparser_parse_hit(tag, hash)
154
+ hit = @iterations.last.hits.last
155
+ case tag
156
+ when 'Hit_num'
157
+ hit.num = hash[tag].to_i
158
+ when 'Hit_id'
159
+ hit.hit_id = hash[tag].clone
160
+ when 'Hit_def'
161
+ hit.definition = hash[tag].clone
162
+ when 'Hit_accession'
163
+ hit.accession = hash[tag].clone
164
+ when 'Hit_len'
165
+ hit.len = hash[tag].clone.to_i
166
+ end
167
+ end
168
+
169
+ def xmlparser_parse_hsp(hash)
170
+ hsp = @iterations.last.hits.last.hsps.last
171
+ hsp.num = hash['Hsp_num'].to_i
172
+ hsp.bit_score = hash['Hsp_bit-score'].to_f
173
+ hsp.score = hash['Hsp_score'].to_i
174
+ hsp.evalue = hash['Hsp_evalue'].to_f
175
+ hsp.query_from = hash['Hsp_query-from'].to_i
176
+ hsp.query_to = hash['Hsp_query-to'].to_i
177
+ hsp.hit_from = hash['Hsp_hit-from'].to_i
178
+ hsp.hit_to = hash['Hsp_hit-to'].to_i
179
+ hsp.pattern_from = hash['Hsp_pattern-from'].to_i
180
+ hsp.pattern_to = hash['Hsp_pattern-to'].to_i
181
+ hsp.query_frame = hash['Hsp_query-frame'].to_i
182
+ hsp.hit_frame = hash['Hsp_hit-frame'].to_i
183
+ hsp.identity = hash['Hsp_identity'].to_i
184
+ hsp.positive = hash['Hsp_positive'].to_i
185
+ hsp.gaps = hash['Hsp_gaps'].to_i
186
+ hsp.align_len = hash['Hsp_align-len'].to_i
187
+ hsp.density = hash['Hsp_density'].to_i
188
+ hsp.qseq = hash['Hsp_qseq']
189
+ hsp.hseq = hash['Hsp_hseq']
190
+ hsp.midline = hash['Hsp_midline']
191
+ end
192
+
193
+ def xmlparser_parse_statistics(hash)
194
+ labels = {
195
+ 'db-num' => 'Statistics_db-num',
196
+ 'db-len' => 'Statistics_db-len',
197
+ 'hsp-len' => 'Statistics_hsp-len',
198
+ 'eff-space' => 'Statistics_eff-space',
199
+ 'kappa' => 'Statistics_kappa',
200
+ 'lambda' => 'Statistics_lambda',
201
+ 'entropy' => 'Statistics_entropy'
202
+ }
203
+ labels.each do |k,v|
204
+ case k
205
+ when 'db-num', 'db-len', 'hsp-len'
206
+ @iterations.last.statistics[k] = hash[v].to_i
207
+ else
208
+ @iterations.last.statistics[k] = hash[v].to_f
209
+ end
210
+ end
211
+ end
212
+
213
+ end
214
+ end
215
+ end
216
+
217
+
218
+ =begin
219
+
220
+ This file is automatically loaded by bio/appl/blast/report.rb
221
+
222
+ =end
@@ -0,0 +1,392 @@
1
+ #
2
+ # = bio/appl/blat/report.rb - BLAT result parser
3
+ #
4
+ # Copyright:: Copyright (C) 2004 GOTO Naohisa <ng@bioruby.org>
5
+ # License:: LGPL
6
+ #
7
+ #--
8
+ # This library is free software; you can redistribute it and/or
9
+ # modify it under the terms of the GNU Lesser General Public
10
+ # License as published by the Free Software Foundation; either
11
+ # version 2 of the License, or (at your option) any later version.
12
+ #
13
+ # This library is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ # Lesser General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU Lesser General Public
19
+ # License along with this library; if not, write to the Free Software
20
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
+ #++
22
+ #
23
+ # $Id: report.rb,v 1.6 2005/12/18 15:58:39 k Exp $
24
+ #
25
+ # BLAT result parser (psl / pslx format).
26
+ #
27
+ # == Important Notes
28
+ #
29
+ # In BLAT results, the start position of a sequnece is numbered as 0.
30
+ # On the other hand, in many other homology search programs,
31
+ # the start position of a sequence is numbered as 1.
32
+ # To keep compatibility, the BLAT parser adds 1 to every position number.
33
+ #
34
+ # == References
35
+ #
36
+ # * Kent, W.J., BLAT--the BLAST-like alignment tool,
37
+ # Genome Research, 12, 656--664, 2002.
38
+ # http://www.genome.org/cgi/content/abstract/12/4/656
39
+ #
40
+
41
+ require 'bio'
42
+
43
+ module Bio
44
+ class Blat
45
+
46
+ # Bio::Blat::Report is a BLAT report parser class.
47
+ # Its object may contain some Bio::Blat::Report::Hits objects.
48
+ #
49
+ # In BLAT results, the start position of a sequnece is numbered as 0.
50
+ # On the other hand, in many other homology search programs,
51
+ # the start position of a sequence is numbered as 1.
52
+ # To keep compatibility, the BLAT parser adds 1 to every position number.
53
+ #
54
+ # Note that Bio::Blat::Report#query_def, #query_id, #query_len methods
55
+ # simply return first hit's query_*.
56
+ # If multiple query sequences are given, these values
57
+ # will be incorrect.
58
+ #
59
+ class Report #< DB
60
+ # Delimiter of each entry. Bio::FlatFile uses it.
61
+ # In Bio::Blat::Report, it it nil (1 entry 1 file).
62
+ DELIMITER = RS = nil # 1 file 1 entry
63
+
64
+ # Creates a new Bio::Blat::Report object from BLAT result text (String).
65
+ # You can use Bio::FlatFile to read a file.
66
+ # Currently, results created with options -out=psl (default) or
67
+ # -out=pslx are supported.
68
+ def initialize(text)
69
+ flag = false
70
+ head = []
71
+ @hits = []
72
+ text.each do |line|
73
+ if flag then
74
+ @hits << Hit.new(line)
75
+ else
76
+ line = line.chomp
77
+ if /\A\-+\s*\z/ =~ line
78
+ flag = true
79
+ else
80
+ head << line
81
+ end
82
+ end
83
+ end
84
+ @columns = parse_header(head)
85
+ end
86
+
87
+ # hits of the result.
88
+ # Returns an Array of Bio::Blat::Report::Hit objects.
89
+ attr_reader :hits
90
+
91
+ # Returns descriptions of columns.
92
+ # Returns an Array.
93
+ # This would be a Bio::Blat specific method.
94
+ attr_reader :columns
95
+
96
+ # Parses headers.
97
+ def parse_header(ary)
98
+ ary.shift # first line is removed
99
+ a0 = ary.collect { |x| x.split(/\t/) }
100
+ k = []
101
+ a0.each do |x|
102
+ x.each_index do |i|
103
+ y = x[i].strip
104
+ k[i] = k[i].to_s + (y.sub!(/\-\z/, '') ? y : y + ' ')
105
+ end
106
+ end
107
+ k.each { |x| x.strip! }
108
+ k
109
+ end
110
+ private :parse_header
111
+
112
+ # Bio::Blat::Report::SeqDesc stores sequence information of
113
+ # query or subject of the BLAT report.
114
+ # It also includes some hit information.
115
+ class SeqDesc
116
+ # Creates a new SeqDesc object.
117
+ # It is designed to be called internally from Bio::Blat::Report class.
118
+ # Users shall not use it directly.
119
+ def initialize(gap_count, gap_bases, name, size,
120
+ st, ed, starts, seqs)
121
+ @gap_count = gap_count.to_i
122
+ @gap_bases = gap_bases.to_i
123
+ @name = name
124
+ @size = size.to_i
125
+ @start = st.to_i
126
+ @end = ed.to_i
127
+ @starts = starts.collect { |x| x.to_i }
128
+ @seqs = seqs
129
+ end
130
+ # gap count
131
+ attr_reader :gap_count
132
+ # gap bases
133
+ attr_reader :gap_bases
134
+ # name of the sequence
135
+ attr_reader :name
136
+ # length of the sequence
137
+ attr_reader :size
138
+ # start position of the first segment
139
+ attr_reader :start
140
+ # end position of the final segment
141
+ attr_reader :end
142
+ # start positions of segments.
143
+ # Returns an array of numbers.
144
+ attr_reader :starts
145
+ # sequences of segments.
146
+ # Returns an array of String.
147
+ # Returns nil if there are no sequence data.
148
+ attr_reader :seqs
149
+ end #class SeqDesc
150
+
151
+ # Sequence segment pair of BLAT result.
152
+ # Similar to Bio::Blast::Report::Hsp but lacks many methods.
153
+ class SegmentPair
154
+ # Creates a new SegmentPair object.
155
+ # It is designed to be called internally from Bio::Blat::Report class.
156
+ # Users shall not use it directly.
157
+ def initialize(query_len, strand,
158
+ blksize, qstart, tstart, qseq, tseq)
159
+ @blocksize = blksize
160
+ @qseq = qseq
161
+ @hseq = hseq
162
+ @hit_strand = 'plus'
163
+ case strand
164
+ when '-'
165
+ # query is minus strand
166
+ @query_strand = 'minus'
167
+ # convert positions
168
+ @query_from = query_len - qstart
169
+ @query_to = query_len - qstart - blksize + 1
170
+ # To keep compatibility, with other homology search programs,
171
+ # we add 1 to each position number.
172
+ @hit_from = tstart + 1
173
+ @hit_to = tstart + blksize # - 1 + 1
174
+ else #when '+'
175
+ @query_strand = 'plus'
176
+ # To keep compatibility with other homology search programs,
177
+ # we add 1 to each position number.
178
+ @query_from = qstart + 1
179
+ @query_to = qstart + blksize # - 1 + 1
180
+ @hit_from = tstart + 1
181
+ @hit_to = tstart + blksize # - 1 + 1
182
+ end
183
+ end
184
+ # Returns query start position.
185
+ # CAUTION: In Blat's raw result(psl format), first position is 0.
186
+ # To keep compatibility, the parser add 1 to the position.
187
+ attr_reader :query_from
188
+
189
+ # Returns query end position.
190
+ # CAUTION: In Blat's raw result(psl format), first position is 0.
191
+ # To keep compatibility, the parser add 1 to the position.
192
+ attr_reader :query_to
193
+
194
+ # Returns query sequence.
195
+ # If sequence data is not available, returns nil.
196
+ attr_reader :qseq
197
+
198
+ # Returns strand information of the query.
199
+ # Returns 'plus' or 'minus'.
200
+ attr_reader :query_strand
201
+
202
+ # Returns target (subject, hit) start position.
203
+ # CAUTION: In Blat's raw result(psl format), first position is 0.
204
+ # To keep compatibility, the parser add 1 to the position.
205
+ attr_reader :hit_from
206
+
207
+ # Returns target (subject, hit) end position.
208
+ # CAUTION: In Blat's raw result(psl format), first position is 0.
209
+ # To keep compatibility, the parser add 1 to the position.
210
+ attr_reader :hit_to
211
+
212
+ # Returns the target (subject, hit) sequence.
213
+ # If sequence data is not available, returns nil.
214
+ attr_reader :hseq
215
+
216
+ # Returns strand information of the target (subject, hit).
217
+ # Returns 'plus' or 'minus'.
218
+ attr_reader :hit_strand
219
+
220
+ # Returns block size (length) of the segment pair.
221
+ # This would be a Bio::Blat specific method.
222
+ attr_reader :blocksize
223
+
224
+ # Returns alignment length of the segment pair.
225
+ # Returns nil if no alignment data are available.
226
+ def align_len
227
+ @qseq ? @qseq.size : nil
228
+ end
229
+ end #class SegmentPair
230
+
231
+ # Hit class for the BLAT result parser.
232
+ # Similar to Bio::Blast::Report::Hit but lacks many methods.
233
+ # Its object may contain some Bio::Blat::Report::SegmentPair objects.
234
+ class Hit
235
+ # Creates a new Hit object from a piece of BLAT result text.
236
+ # It is designed to be called internally from Bio::Blat::Report object.
237
+ # Users shall not use it directly.
238
+ def initialize(str)
239
+ @data = str.chomp.split(/\t/)
240
+ end
241
+
242
+ # Raw data of the hit.
243
+ # (Note that it doesn't add 1 to position numbers.)
244
+ attr_reader :data
245
+
246
+ # split comma-separeted text
247
+ def split_comma(str)
248
+ str.to_s.sub(/\s*\,+\s*\z/, '').split(/\s*\,\s*/)
249
+ end
250
+ private :split_comma
251
+
252
+ # Returns sequence informations of the query.
253
+ # Returns a Bio::Blat::Report::SeqDesc object.
254
+ # This would be Bio::Blat specific method.
255
+ def query
256
+ unless defined?(@query)
257
+ d = @data
258
+ @query = SeqDesc.new(d[4], d[5], d[9], d[10], d[11], d[12],
259
+ split_comma(d[19]), split_comma(d[21]))
260
+ end
261
+ @query
262
+ end
263
+
264
+ # Returns sequence informations of the target(hit).
265
+ # Returns a Bio::Blat::Report::SeqDesc object.
266
+ # This would be Bio::Blat specific method.
267
+ def target
268
+ unless defined?(@target)
269
+ d = @data
270
+ @target = SeqDesc.new(d[6], d[7], d[13], d[14], d[15], d[16],
271
+ split_comma(d[20]), split_comma(d[22]))
272
+ end
273
+ @target
274
+ end
275
+
276
+ # Match nucleotides.
277
+ def match; @data[0].to_i; end
278
+ # Mismatch nucleotides.
279
+ def mismatch; @data[1].to_i; end
280
+ # rep. match (???)
281
+ def rep_match; @data[2].to_i; end
282
+ # N's (???)
283
+ def n_s; @data[3].to_i; end
284
+
285
+ # Returns strand information of the hit.
286
+ # Returns '+' or '-'.
287
+ # This would be a Bio::Blat specific method.
288
+ def strand; @data[8]; end
289
+
290
+ # Number of blocks(exons, segment pairs).
291
+ def block_count; @data[17].to_i; end
292
+
293
+ # Sizes of all blocks(exons, segment pairs).
294
+ # Returns an array of numbers.
295
+ def block_sizes
296
+ unless defined?(@block_sizes) then
297
+ @block_sizes = split_comma(@data[18]).collect { |x| x.to_i }
298
+ end
299
+ @block_sizes
300
+ end
301
+
302
+ # Returns blocks(exons, segment pairs) of the hit.
303
+ # Returns an array of Bio::Blat::Report::SegmentPair objects.
304
+ def blocks
305
+ unless defined?(@blocks)
306
+ bs = block_sizes
307
+ qst = query.starts
308
+ tst = target.starts
309
+ qseqs = query.seqs
310
+ tseqs = target.seqs
311
+ @blocks = (0...block_count).collect do |i|
312
+ SegmentPair.new(query.size, strand, bs[i],
313
+ qst[i], tst[i], qseqs[i], tseqs[i])
314
+ end
315
+ end
316
+ @blocks
317
+ end
318
+ alias exons blocks
319
+
320
+ #--
321
+ # Bio::BLAST::*::Report::Hit compatible methods
322
+ #++
323
+ alias hsps blocks
324
+
325
+ # Returns the length of query sequence.
326
+ def query_len; query.size; end
327
+
328
+ # Returns the name of query sequence.
329
+ def query_def; query.name; end
330
+ alias query_id query_def
331
+
332
+ # Returns the length of the target(subject) sequence.
333
+ def target_len; target.size; end
334
+ alias len target_len
335
+
336
+ # Returns the name of the target(subject) sequence.
337
+ def target_def; target.name; end
338
+ alias target_id target_def
339
+ alias definition target_def
340
+
341
+ #Iterates over each block(exon, segment pair) of the hit.
342
+ # Yields a Bio::Blat::Report::SegmentPair object.
343
+ def each(&x) #:yields: segmentpair
344
+ exons.each(&x)
345
+ end
346
+ end #class Hit
347
+
348
+ #--
349
+ #Bio::BLAST::*::Report compatible methods
350
+ #++
351
+
352
+ # Returns number of hits.
353
+ # Same as hits.size.
354
+ def num_hits; @hits.size; end
355
+
356
+ # Iterates over each Bio::Blat::Report::Hit object.
357
+ # Same as hits.each.
358
+ def each_hit(&x) #:yields: hit
359
+ @hits.each(&x)
360
+ end
361
+ alias each each_hit
362
+
363
+ # Returns the name of query sequence.
364
+ # CAUTION: query_* methods simply return first hit's query_*.
365
+ # If multiple query sequences are given, these values
366
+ # will be incorrect.
367
+ def query_def; (x = @hits.first) ? x.query_def : nil; end
368
+
369
+ # Returns the length of query sequence.
370
+ # CAUTION: query_* methods simply return first hit's query_*.
371
+ # If multiple query sequences are given, these values
372
+ # will be incorrect.
373
+ def query_len; (x = @hits.first) ? x.query_len : nil; end
374
+ alias query_id query_def
375
+ end #class Report
376
+
377
+ end #class Blat
378
+ end #module Bio
379
+
380
+ =begin
381
+
382
+ = Bio::Blat::Report
383
+
384
+ BLAT result parser. (psl / pslx format)
385
+
386
+ = References
387
+
388
+ * ((<URL:http://www.genome.org/cgi/content/abstract/12/4/656>))
389
+ Kent, W.J., BLAT--the BLAST-like alignment tool,
390
+ Genome Research, 12, 656--664, 2002.
391
+
392
+ =end