bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,222 @@
1
+ #
2
+ # bio/appl/blast/xmlparser.rb - BLAST XML output (-m 7) parser by XMLParser
3
+ #
4
+ # Copyright (C) 2001 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # Copyright (C) 2003 KATAYAMA Toshiaki <k@bioruby.org>
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ # $Id: xmlparser.rb,v 1.13 2005/09/08 01:22:08 k Exp $
22
+ #
23
+
24
+ begin
25
+ require 'xmlparser'
26
+ rescue LoadError
27
+ end
28
+
29
+ module Bio
30
+ class Blast
31
+ class Report
32
+
33
+ private
34
+
35
+ def xmlparser_parse(xml)
36
+ parser = XMLParser.new
37
+ def parser.default; end
38
+
39
+ begin
40
+ tag_stack = Array.new
41
+ hash = Hash.new
42
+
43
+ parser.parse(xml) do |type, name, data|
44
+ #print "type=#{type.inspect} name=#{name.inspect} data=#{data.inspect}\n" # for DEBUG
45
+ case type
46
+ when XMLParser::START_ELEM
47
+ tag_stack.push(name)
48
+ hash.update(data)
49
+ case name
50
+ when 'Iteration'
51
+ iteration = Iteration.new
52
+ @iterations.push(iteration)
53
+ when 'Hit'
54
+ hit = Hit.new
55
+ hit.query_id = @query_id
56
+ hit.query_def = @query_def
57
+ hit.query_len = @query_len
58
+ @iterations.last.hits.push(hit)
59
+ when 'Hsp'
60
+ hsp = Hsp.new
61
+ @iterations.last.hits.last.hsps.push(hsp)
62
+ end
63
+ when XMLParser::END_ELEM
64
+ case name
65
+ when /^BlastOutput/
66
+ xmlparser_parse_program(name,hash)
67
+ hash = Hash.new
68
+ when /^Parameters$/
69
+ xmlparser_parse_parameters(hash)
70
+ hash = Hash.new
71
+ when /^Iteration/
72
+ xmlparser_parse_iteration(name, hash)
73
+ hash = Hash.new
74
+ when /^Hit/
75
+ xmlparser_parse_hit(name, hash)
76
+ hash = Hash.new
77
+ when /^Hsp$/
78
+ xmlparser_parse_hsp(hash)
79
+ hash = Hash.new
80
+ when /^Statistics$/
81
+ xmlparser_parse_statistics(hash)
82
+ hash = Hash.new
83
+ end
84
+ tag_stack.pop
85
+ when XMLParser::CDATA
86
+ if hash[tag_stack.last].nil?
87
+ hash[tag_stack.last] = data unless data.strip.empty?
88
+ else
89
+ hash[tag_stack.last].concat(data) if data
90
+ end
91
+ when XMLParser::PI
92
+ end
93
+ end
94
+ rescue XMLParserError
95
+ line = parser.line
96
+ column = parser.column
97
+ print "Parse error at #{line}(#{column}) : #{$!}\n"
98
+ end
99
+ end
100
+
101
+
102
+ def xmlparser_parse_program(tag, hash)
103
+ case tag
104
+ when 'BlastOutput_program'
105
+ @program = hash[tag]
106
+ when 'BlastOutput_version'
107
+ @version = hash[tag]
108
+ when 'BlastOutput_reference'
109
+ @reference = hash[tag]
110
+ when 'BlastOutput_db'
111
+ @db = hash[tag].strip
112
+ when 'BlastOutput_query-ID'
113
+ @query_id = hash[tag]
114
+ when 'BlastOutput_query-def'
115
+ @query_def = hash[tag]
116
+ when 'BlastOutput_query-len'
117
+ @query_len = hash[tag].to_i
118
+ end
119
+ end
120
+
121
+ def xmlparser_parse_parameters(hash)
122
+ labels = {
123
+ 'matrix' => 'Parameters_matrix',
124
+ 'expect' => 'Parameters_expect',
125
+ 'include' => 'Parameters_include',
126
+ 'sc-match' => 'Parameters_sc-match',
127
+ 'sc-mismatch' => 'Parameters_sc-mismatch',
128
+ 'gap-open' => 'Parameters_gap-open',
129
+ 'gap-extend' => 'Parameters_gap-extend',
130
+ 'filter' => 'Parameters_filter',
131
+ 'pattern' => 'Parameters_pattern',
132
+ 'entrez-query'=> 'Parameters_entrez-query',
133
+ }
134
+ labels.each do |k,v|
135
+ case k
136
+ when 'filter', 'matrix'
137
+ @parameters[k] = hash[v].to_s
138
+ else
139
+ @parameters[k] = hash[v].to_i
140
+ end
141
+ end
142
+ end
143
+
144
+ def xmlparser_parse_iteration(tag, hash)
145
+ case tag
146
+ when 'Iteration_iter-num'
147
+ @iterations.last.num = hash[tag].to_i
148
+ when 'Iteration_message'
149
+ @iterations.last.message = hash[tag].to_s
150
+ end
151
+ end
152
+
153
+ def xmlparser_parse_hit(tag, hash)
154
+ hit = @iterations.last.hits.last
155
+ case tag
156
+ when 'Hit_num'
157
+ hit.num = hash[tag].to_i
158
+ when 'Hit_id'
159
+ hit.hit_id = hash[tag].clone
160
+ when 'Hit_def'
161
+ hit.definition = hash[tag].clone
162
+ when 'Hit_accession'
163
+ hit.accession = hash[tag].clone
164
+ when 'Hit_len'
165
+ hit.len = hash[tag].clone.to_i
166
+ end
167
+ end
168
+
169
+ def xmlparser_parse_hsp(hash)
170
+ hsp = @iterations.last.hits.last.hsps.last
171
+ hsp.num = hash['Hsp_num'].to_i
172
+ hsp.bit_score = hash['Hsp_bit-score'].to_f
173
+ hsp.score = hash['Hsp_score'].to_i
174
+ hsp.evalue = hash['Hsp_evalue'].to_f
175
+ hsp.query_from = hash['Hsp_query-from'].to_i
176
+ hsp.query_to = hash['Hsp_query-to'].to_i
177
+ hsp.hit_from = hash['Hsp_hit-from'].to_i
178
+ hsp.hit_to = hash['Hsp_hit-to'].to_i
179
+ hsp.pattern_from = hash['Hsp_pattern-from'].to_i
180
+ hsp.pattern_to = hash['Hsp_pattern-to'].to_i
181
+ hsp.query_frame = hash['Hsp_query-frame'].to_i
182
+ hsp.hit_frame = hash['Hsp_hit-frame'].to_i
183
+ hsp.identity = hash['Hsp_identity'].to_i
184
+ hsp.positive = hash['Hsp_positive'].to_i
185
+ hsp.gaps = hash['Hsp_gaps'].to_i
186
+ hsp.align_len = hash['Hsp_align-len'].to_i
187
+ hsp.density = hash['Hsp_density'].to_i
188
+ hsp.qseq = hash['Hsp_qseq']
189
+ hsp.hseq = hash['Hsp_hseq']
190
+ hsp.midline = hash['Hsp_midline']
191
+ end
192
+
193
+ def xmlparser_parse_statistics(hash)
194
+ labels = {
195
+ 'db-num' => 'Statistics_db-num',
196
+ 'db-len' => 'Statistics_db-len',
197
+ 'hsp-len' => 'Statistics_hsp-len',
198
+ 'eff-space' => 'Statistics_eff-space',
199
+ 'kappa' => 'Statistics_kappa',
200
+ 'lambda' => 'Statistics_lambda',
201
+ 'entropy' => 'Statistics_entropy'
202
+ }
203
+ labels.each do |k,v|
204
+ case k
205
+ when 'db-num', 'db-len', 'hsp-len'
206
+ @iterations.last.statistics[k] = hash[v].to_i
207
+ else
208
+ @iterations.last.statistics[k] = hash[v].to_f
209
+ end
210
+ end
211
+ end
212
+
213
+ end
214
+ end
215
+ end
216
+
217
+
218
+ =begin
219
+
220
+ This file is automatically loaded by bio/appl/blast/report.rb
221
+
222
+ =end
@@ -0,0 +1,392 @@
1
+ #
2
+ # = bio/appl/blat/report.rb - BLAT result parser
3
+ #
4
+ # Copyright:: Copyright (C) 2004 GOTO Naohisa <ng@bioruby.org>
5
+ # License:: LGPL
6
+ #
7
+ #--
8
+ # This library is free software; you can redistribute it and/or
9
+ # modify it under the terms of the GNU Lesser General Public
10
+ # License as published by the Free Software Foundation; either
11
+ # version 2 of the License, or (at your option) any later version.
12
+ #
13
+ # This library is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ # Lesser General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU Lesser General Public
19
+ # License along with this library; if not, write to the Free Software
20
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
+ #++
22
+ #
23
+ # $Id: report.rb,v 1.6 2005/12/18 15:58:39 k Exp $
24
+ #
25
+ # BLAT result parser (psl / pslx format).
26
+ #
27
+ # == Important Notes
28
+ #
29
+ # In BLAT results, the start position of a sequnece is numbered as 0.
30
+ # On the other hand, in many other homology search programs,
31
+ # the start position of a sequence is numbered as 1.
32
+ # To keep compatibility, the BLAT parser adds 1 to every position number.
33
+ #
34
+ # == References
35
+ #
36
+ # * Kent, W.J., BLAT--the BLAST-like alignment tool,
37
+ # Genome Research, 12, 656--664, 2002.
38
+ # http://www.genome.org/cgi/content/abstract/12/4/656
39
+ #
40
+
41
+ require 'bio'
42
+
43
+ module Bio
44
+ class Blat
45
+
46
+ # Bio::Blat::Report is a BLAT report parser class.
47
+ # Its object may contain some Bio::Blat::Report::Hits objects.
48
+ #
49
+ # In BLAT results, the start position of a sequnece is numbered as 0.
50
+ # On the other hand, in many other homology search programs,
51
+ # the start position of a sequence is numbered as 1.
52
+ # To keep compatibility, the BLAT parser adds 1 to every position number.
53
+ #
54
+ # Note that Bio::Blat::Report#query_def, #query_id, #query_len methods
55
+ # simply return first hit's query_*.
56
+ # If multiple query sequences are given, these values
57
+ # will be incorrect.
58
+ #
59
+ class Report #< DB
60
+ # Delimiter of each entry. Bio::FlatFile uses it.
61
+ # In Bio::Blat::Report, it it nil (1 entry 1 file).
62
+ DELIMITER = RS = nil # 1 file 1 entry
63
+
64
+ # Creates a new Bio::Blat::Report object from BLAT result text (String).
65
+ # You can use Bio::FlatFile to read a file.
66
+ # Currently, results created with options -out=psl (default) or
67
+ # -out=pslx are supported.
68
+ def initialize(text)
69
+ flag = false
70
+ head = []
71
+ @hits = []
72
+ text.each do |line|
73
+ if flag then
74
+ @hits << Hit.new(line)
75
+ else
76
+ line = line.chomp
77
+ if /\A\-+\s*\z/ =~ line
78
+ flag = true
79
+ else
80
+ head << line
81
+ end
82
+ end
83
+ end
84
+ @columns = parse_header(head)
85
+ end
86
+
87
+ # hits of the result.
88
+ # Returns an Array of Bio::Blat::Report::Hit objects.
89
+ attr_reader :hits
90
+
91
+ # Returns descriptions of columns.
92
+ # Returns an Array.
93
+ # This would be a Bio::Blat specific method.
94
+ attr_reader :columns
95
+
96
+ # Parses headers.
97
+ def parse_header(ary)
98
+ ary.shift # first line is removed
99
+ a0 = ary.collect { |x| x.split(/\t/) }
100
+ k = []
101
+ a0.each do |x|
102
+ x.each_index do |i|
103
+ y = x[i].strip
104
+ k[i] = k[i].to_s + (y.sub!(/\-\z/, '') ? y : y + ' ')
105
+ end
106
+ end
107
+ k.each { |x| x.strip! }
108
+ k
109
+ end
110
+ private :parse_header
111
+
112
+ # Bio::Blat::Report::SeqDesc stores sequence information of
113
+ # query or subject of the BLAT report.
114
+ # It also includes some hit information.
115
+ class SeqDesc
116
+ # Creates a new SeqDesc object.
117
+ # It is designed to be called internally from Bio::Blat::Report class.
118
+ # Users shall not use it directly.
119
+ def initialize(gap_count, gap_bases, name, size,
120
+ st, ed, starts, seqs)
121
+ @gap_count = gap_count.to_i
122
+ @gap_bases = gap_bases.to_i
123
+ @name = name
124
+ @size = size.to_i
125
+ @start = st.to_i
126
+ @end = ed.to_i
127
+ @starts = starts.collect { |x| x.to_i }
128
+ @seqs = seqs
129
+ end
130
+ # gap count
131
+ attr_reader :gap_count
132
+ # gap bases
133
+ attr_reader :gap_bases
134
+ # name of the sequence
135
+ attr_reader :name
136
+ # length of the sequence
137
+ attr_reader :size
138
+ # start position of the first segment
139
+ attr_reader :start
140
+ # end position of the final segment
141
+ attr_reader :end
142
+ # start positions of segments.
143
+ # Returns an array of numbers.
144
+ attr_reader :starts
145
+ # sequences of segments.
146
+ # Returns an array of String.
147
+ # Returns nil if there are no sequence data.
148
+ attr_reader :seqs
149
+ end #class SeqDesc
150
+
151
+ # Sequence segment pair of BLAT result.
152
+ # Similar to Bio::Blast::Report::Hsp but lacks many methods.
153
+ class SegmentPair
154
+ # Creates a new SegmentPair object.
155
+ # It is designed to be called internally from Bio::Blat::Report class.
156
+ # Users shall not use it directly.
157
+ def initialize(query_len, strand,
158
+ blksize, qstart, tstart, qseq, tseq)
159
+ @blocksize = blksize
160
+ @qseq = qseq
161
+ @hseq = hseq
162
+ @hit_strand = 'plus'
163
+ case strand
164
+ when '-'
165
+ # query is minus strand
166
+ @query_strand = 'minus'
167
+ # convert positions
168
+ @query_from = query_len - qstart
169
+ @query_to = query_len - qstart - blksize + 1
170
+ # To keep compatibility, with other homology search programs,
171
+ # we add 1 to each position number.
172
+ @hit_from = tstart + 1
173
+ @hit_to = tstart + blksize # - 1 + 1
174
+ else #when '+'
175
+ @query_strand = 'plus'
176
+ # To keep compatibility with other homology search programs,
177
+ # we add 1 to each position number.
178
+ @query_from = qstart + 1
179
+ @query_to = qstart + blksize # - 1 + 1
180
+ @hit_from = tstart + 1
181
+ @hit_to = tstart + blksize # - 1 + 1
182
+ end
183
+ end
184
+ # Returns query start position.
185
+ # CAUTION: In Blat's raw result(psl format), first position is 0.
186
+ # To keep compatibility, the parser add 1 to the position.
187
+ attr_reader :query_from
188
+
189
+ # Returns query end position.
190
+ # CAUTION: In Blat's raw result(psl format), first position is 0.
191
+ # To keep compatibility, the parser add 1 to the position.
192
+ attr_reader :query_to
193
+
194
+ # Returns query sequence.
195
+ # If sequence data is not available, returns nil.
196
+ attr_reader :qseq
197
+
198
+ # Returns strand information of the query.
199
+ # Returns 'plus' or 'minus'.
200
+ attr_reader :query_strand
201
+
202
+ # Returns target (subject, hit) start position.
203
+ # CAUTION: In Blat's raw result(psl format), first position is 0.
204
+ # To keep compatibility, the parser add 1 to the position.
205
+ attr_reader :hit_from
206
+
207
+ # Returns target (subject, hit) end position.
208
+ # CAUTION: In Blat's raw result(psl format), first position is 0.
209
+ # To keep compatibility, the parser add 1 to the position.
210
+ attr_reader :hit_to
211
+
212
+ # Returns the target (subject, hit) sequence.
213
+ # If sequence data is not available, returns nil.
214
+ attr_reader :hseq
215
+
216
+ # Returns strand information of the target (subject, hit).
217
+ # Returns 'plus' or 'minus'.
218
+ attr_reader :hit_strand
219
+
220
+ # Returns block size (length) of the segment pair.
221
+ # This would be a Bio::Blat specific method.
222
+ attr_reader :blocksize
223
+
224
+ # Returns alignment length of the segment pair.
225
+ # Returns nil if no alignment data are available.
226
+ def align_len
227
+ @qseq ? @qseq.size : nil
228
+ end
229
+ end #class SegmentPair
230
+
231
+ # Hit class for the BLAT result parser.
232
+ # Similar to Bio::Blast::Report::Hit but lacks many methods.
233
+ # Its object may contain some Bio::Blat::Report::SegmentPair objects.
234
+ class Hit
235
+ # Creates a new Hit object from a piece of BLAT result text.
236
+ # It is designed to be called internally from Bio::Blat::Report object.
237
+ # Users shall not use it directly.
238
+ def initialize(str)
239
+ @data = str.chomp.split(/\t/)
240
+ end
241
+
242
+ # Raw data of the hit.
243
+ # (Note that it doesn't add 1 to position numbers.)
244
+ attr_reader :data
245
+
246
+ # split comma-separeted text
247
+ def split_comma(str)
248
+ str.to_s.sub(/\s*\,+\s*\z/, '').split(/\s*\,\s*/)
249
+ end
250
+ private :split_comma
251
+
252
+ # Returns sequence informations of the query.
253
+ # Returns a Bio::Blat::Report::SeqDesc object.
254
+ # This would be Bio::Blat specific method.
255
+ def query
256
+ unless defined?(@query)
257
+ d = @data
258
+ @query = SeqDesc.new(d[4], d[5], d[9], d[10], d[11], d[12],
259
+ split_comma(d[19]), split_comma(d[21]))
260
+ end
261
+ @query
262
+ end
263
+
264
+ # Returns sequence informations of the target(hit).
265
+ # Returns a Bio::Blat::Report::SeqDesc object.
266
+ # This would be Bio::Blat specific method.
267
+ def target
268
+ unless defined?(@target)
269
+ d = @data
270
+ @target = SeqDesc.new(d[6], d[7], d[13], d[14], d[15], d[16],
271
+ split_comma(d[20]), split_comma(d[22]))
272
+ end
273
+ @target
274
+ end
275
+
276
+ # Match nucleotides.
277
+ def match; @data[0].to_i; end
278
+ # Mismatch nucleotides.
279
+ def mismatch; @data[1].to_i; end
280
+ # rep. match (???)
281
+ def rep_match; @data[2].to_i; end
282
+ # N's (???)
283
+ def n_s; @data[3].to_i; end
284
+
285
+ # Returns strand information of the hit.
286
+ # Returns '+' or '-'.
287
+ # This would be a Bio::Blat specific method.
288
+ def strand; @data[8]; end
289
+
290
+ # Number of blocks(exons, segment pairs).
291
+ def block_count; @data[17].to_i; end
292
+
293
+ # Sizes of all blocks(exons, segment pairs).
294
+ # Returns an array of numbers.
295
+ def block_sizes
296
+ unless defined?(@block_sizes) then
297
+ @block_sizes = split_comma(@data[18]).collect { |x| x.to_i }
298
+ end
299
+ @block_sizes
300
+ end
301
+
302
+ # Returns blocks(exons, segment pairs) of the hit.
303
+ # Returns an array of Bio::Blat::Report::SegmentPair objects.
304
+ def blocks
305
+ unless defined?(@blocks)
306
+ bs = block_sizes
307
+ qst = query.starts
308
+ tst = target.starts
309
+ qseqs = query.seqs
310
+ tseqs = target.seqs
311
+ @blocks = (0...block_count).collect do |i|
312
+ SegmentPair.new(query.size, strand, bs[i],
313
+ qst[i], tst[i], qseqs[i], tseqs[i])
314
+ end
315
+ end
316
+ @blocks
317
+ end
318
+ alias exons blocks
319
+
320
+ #--
321
+ # Bio::BLAST::*::Report::Hit compatible methods
322
+ #++
323
+ alias hsps blocks
324
+
325
+ # Returns the length of query sequence.
326
+ def query_len; query.size; end
327
+
328
+ # Returns the name of query sequence.
329
+ def query_def; query.name; end
330
+ alias query_id query_def
331
+
332
+ # Returns the length of the target(subject) sequence.
333
+ def target_len; target.size; end
334
+ alias len target_len
335
+
336
+ # Returns the name of the target(subject) sequence.
337
+ def target_def; target.name; end
338
+ alias target_id target_def
339
+ alias definition target_def
340
+
341
+ #Iterates over each block(exon, segment pair) of the hit.
342
+ # Yields a Bio::Blat::Report::SegmentPair object.
343
+ def each(&x) #:yields: segmentpair
344
+ exons.each(&x)
345
+ end
346
+ end #class Hit
347
+
348
+ #--
349
+ #Bio::BLAST::*::Report compatible methods
350
+ #++
351
+
352
+ # Returns number of hits.
353
+ # Same as hits.size.
354
+ def num_hits; @hits.size; end
355
+
356
+ # Iterates over each Bio::Blat::Report::Hit object.
357
+ # Same as hits.each.
358
+ def each_hit(&x) #:yields: hit
359
+ @hits.each(&x)
360
+ end
361
+ alias each each_hit
362
+
363
+ # Returns the name of query sequence.
364
+ # CAUTION: query_* methods simply return first hit's query_*.
365
+ # If multiple query sequences are given, these values
366
+ # will be incorrect.
367
+ def query_def; (x = @hits.first) ? x.query_def : nil; end
368
+
369
+ # Returns the length of query sequence.
370
+ # CAUTION: query_* methods simply return first hit's query_*.
371
+ # If multiple query sequences are given, these values
372
+ # will be incorrect.
373
+ def query_len; (x = @hits.first) ? x.query_len : nil; end
374
+ alias query_id query_def
375
+ end #class Report
376
+
377
+ end #class Blat
378
+ end #module Bio
379
+
380
+ =begin
381
+
382
+ = Bio::Blat::Report
383
+
384
+ BLAT result parser. (psl / pslx format)
385
+
386
+ = References
387
+
388
+ * ((<URL:http://www.genome.org/cgi/content/abstract/12/4/656>))
389
+ Kent, W.J., BLAT--the BLAST-like alignment tool,
390
+ Genome Research, 12, 656--664, 2002.
391
+
392
+ =end