bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,1906 @@
1
+ #
2
+ # = bio/alignment.rb - multiple alignment of sequences
3
+ #
4
+ # Copyright:: Copyright (C) 2003, 2005
5
+ # GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
6
+ #
7
+ # License:: LGPL
8
+ #
9
+ # $Id: alignment.rb,v 1.14 2005/12/02 12:01:28 ngoto Exp $
10
+ #
11
+ #--
12
+ # This library is free software; you can redistribute it and/or
13
+ # modify it under the terms of the GNU Lesser General Public
14
+ # License as published by the Free Software Foundation; either
15
+ # version 2 of the License, or (at your option) any later version.
16
+ #
17
+ # This library is distributed in the hope that it will be useful,
18
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
19
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
+ # Lesser General Public License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Lesser General Public
23
+ # License along with this library; if not, write to the Free Software
24
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
+ #++
26
+ #
27
+ # = About Bio::Alignment
28
+ #
29
+ # Please refer document of Bio::Alignment module.
30
+ #
31
+ # = References
32
+ #
33
+ # * Bio::Align::AlignI class of the BioPerl.
34
+ # http://doc.bioperl.org/releases/bioperl-1.4/Bio/Align/AlignI.html
35
+ #
36
+ # * Bio::SimpleAlign class of the BioPerl.
37
+ # http://doc.bioperl.org/releases/bioperl-1.4/Bio/SimpleAlign.html
38
+ #
39
+
40
+ require 'bio/sequence'
41
+
42
+ module Bio
43
+
44
+ =begin rdoc
45
+
46
+ = About Bio::Alignment
47
+
48
+ Bio::Alignment is a namespace of classes/modules for multiple sequence
49
+ alignment.
50
+
51
+ = Multiple alignment container classes
52
+
53
+ == Bio::Alignment::OriginalAlignment
54
+
55
+ == Bio::Alignment::SequenceArray
56
+
57
+ == Bio::Alignment::SequenceHash
58
+
59
+ = Bio::Alignment::Site
60
+
61
+ = Modules
62
+
63
+ == Bio::Alignment::EnumerableExtension
64
+
65
+ Mix-in for classes included Enumerable.
66
+
67
+ == Bio::Alignment::ArrayExtension
68
+
69
+ Mix-in for Array or Array-like classes.
70
+
71
+ == Bio::Alignment::HashExtension
72
+
73
+ Mix-in for Hash or Hash-like classes.
74
+
75
+ == Bio::Alignment::SiteMethods
76
+
77
+ == Bio::Alignment::PropertyMethods
78
+
79
+ = Bio::Alignment::GAP
80
+
81
+ = Compatibility from older BioRuby
82
+
83
+ =end
84
+ module Alignment
85
+
86
+ # Bio::Alignment::PropertyMethods is a set of methods to treat
87
+ # the gap character and so on.
88
+ module PropertyMethods
89
+ # regular expression for detecting gaps.
90
+ GAP_REGEXP = /[^a-zA-Z]/
91
+ # gap character
92
+ GAP_CHAR = '-'.freeze
93
+ # missing character
94
+ MISSING_CHAR = '?'.freeze
95
+
96
+ # If given character is a gap, returns true.
97
+ # Otherwise, return false.
98
+ # Note that <em>s</em> must be a String which contain a single character.
99
+ def is_gap?(s)
100
+ (gap_regexp =~ s) ? true : false
101
+ end
102
+
103
+ # Returns regular expression for checking gap.
104
+ def gap_regexp
105
+ @gap_regexp or GAP_REGEXP
106
+ end
107
+ # regular expression for checking gap
108
+ attr_writer :gap_regexp
109
+
110
+ # Gap character.
111
+ def gap_char
112
+ @gap_char or GAP_CHAR
113
+ end
114
+ # gap character
115
+ attr_writer :gap_char
116
+
117
+ # Character if the site is missing or unknown.
118
+ def missing_char
119
+ @missing_char or MISSING_CHAR
120
+ end
121
+ # Character if the site is missing or unknown.
122
+ attr_writer :missing_char
123
+
124
+ # Returns class of the sequence.
125
+ # If instance variable @seqclass (which can be
126
+ # set by 'seqclass=' method) is set, simply returns the value.
127
+ # Otherwise, returns the first sequence's class.
128
+ # If no sequences are found, returns nil.
129
+ def seqclass
130
+ @seqclass or String
131
+ end
132
+
133
+ # The class of the sequence.
134
+ # The value must be String or its derivatives.
135
+ attr_writer :seqclass
136
+
137
+ # Returns properties defined in the object as an hash.
138
+ def get_all_property
139
+ ret = {}
140
+ if defined? @gap_regexp
141
+ ret[:gap_regexp] = @gap_regexp
142
+ end
143
+ if defined? @gap_char
144
+ ret[:gap_char] = @gap_char
145
+ end
146
+ if defined? @missing_char
147
+ ret[:missing_char] = @missing_char
148
+ end
149
+ if defined? @seqclass
150
+ ret[:seqclass] = @seqclass
151
+ end
152
+ ret
153
+ end
154
+
155
+ # Sets properties from given hash.
156
+ # <em>hash</em> would be a return value of <tt>get_character</tt> method.
157
+ def set_all_property(hash)
158
+ @gap_regexp = hash[:gap_regexp] if hash.has_key?(:gap_regexp)
159
+ @gap_char = hash[:gap_char] if hash.has_key?(:gap_char)
160
+ @missing_char = hash[:missing_char] if hash.has_key?(:missing_char)
161
+ @seqclass = hash[:seqclass] if hash.has_key?(:seqclass)
162
+ self
163
+ end
164
+ end #module PropertyMethods
165
+
166
+ # Bio::Alignment::SiteMethods is a set of methods for
167
+ # Bio::Alignment::Site.
168
+ # It can also be used for extending an array of single-letter strings.
169
+ module SiteMethods
170
+ include PropertyMethods
171
+
172
+ # If there are gaps, returns true. Otherwise, returns false.
173
+ def has_gap?
174
+ (find { |x| is_gap?(x) }) ? true : false
175
+ end
176
+
177
+ # Removes gaps in the site. (destructive method)
178
+ def remove_gaps!
179
+ flag = nil
180
+ self.collect! do |x|
181
+ if is_gap?(x) then flag = self; nil; else x; end
182
+ end
183
+ self.compact!
184
+ flag
185
+ end
186
+
187
+ # Returns consensus character of the site.
188
+ # If consensus is found, eturns a single-letter string.
189
+ # If not, returns nil.
190
+ def consensus_string(threshold = 1.0)
191
+ return nil if self.size <= 0
192
+ return self[0] if self.sort.uniq.size == 1
193
+ h = Hash.new(0)
194
+ self.each { |x| h[x] += 1 }
195
+ total = self.size
196
+ b = h.to_a.sort do |x,y|
197
+ z = (y[1] <=> x[1])
198
+ z = (self.index(x[0]) <=> self.index(y[0])) if z == 0
199
+ z
200
+ end
201
+ if total * threshold <= b[0][1] then
202
+ b[0][0]
203
+ else
204
+ nil
205
+ end
206
+ end
207
+
208
+ # IUPAC nucleotide groups. Internal use only.
209
+ IUPAC_NUC = [
210
+ %w( t u ),
211
+ %w( m a c ),
212
+ %w( r a g ),
213
+ %w( w a t u ),
214
+ %w( s c g ),
215
+ %w( y c t u ),
216
+ %w( k g t u ),
217
+ %w( v a c g m r s ),
218
+ %w( h a c t u m w y ),
219
+ %w( d a g t u r w k ),
220
+ %w( b c g t u s y k ),
221
+ %w( n a c g t u m r w s y k v h d b )
222
+ ]
223
+
224
+ # Returns an IUPAC consensus base for the site.
225
+ # If consensus is found, eturns a single-letter string.
226
+ # If not, returns nil.
227
+ def consensus_iupac
228
+ a = self.collect { |x| x.downcase }.sort.uniq
229
+ if a.size == 1 then
230
+ case a[0]
231
+ when 'a', 'c', 'g', 't'
232
+ a[0]
233
+ when 'u'
234
+ 't'
235
+ else
236
+ IUPAC_NUC.find { |x| a[0] == x[0] } ? a[0] : nil
237
+ end
238
+ elsif r = IUPAC_NUC.find { |x| (a - x).size <= 0 } then
239
+ r[0]
240
+ else
241
+ nil
242
+ end
243
+ end
244
+
245
+ # Table of strongly conserved amino-acid groups.
246
+ #
247
+ # The value of the tables are taken from BioPerl
248
+ # (Bio/SimpleAlign.pm in BioPerl 1.0),
249
+ # and the BioPerl's document says that
250
+ # it is taken from Clustalw documentation and
251
+ # These are all the positively scoring groups that occur in the
252
+ # Gonnet Pam250 matrix. The strong and weak groups are
253
+ # defined as strong score >0.5 and weak score =<0.5 respectively.
254
+ #
255
+ StrongConservationGroups = %w(STA NEQK NHQK NDEQ QHRK MILV MILF
256
+ HY FYW).collect { |x| x.split('').sort }
257
+
258
+ # Table of weakly conserved amino-acid groups.
259
+ #
260
+ # Please refer StrongConservationGroups document
261
+ # for the origin of the table.
262
+ WeakConservationGroups = %w(CSA ATV SAG STNK STPA SGND SNDEQK
263
+ NDEQHK NEQHRK FVLIM HFY).collect { |x| x.split('').sort }
264
+
265
+ # Returns the match-line character for the site.
266
+ # This is amino-acid version.
267
+ def match_line_amino(opt = {})
268
+ # opt[:match_line_char] ==> 100% equal default: '*'
269
+ # opt[:strong_match_char] ==> strong match default: ':'
270
+ # opt[:weak_match_char] ==> weak match default: '.'
271
+ # opt[:mismatch_char] ==> mismatch default: ' '
272
+ mlc = (opt[:match_line_char] or '*')
273
+ smc = (opt[:strong_match_char] or ':')
274
+ wmc = (opt[:weak_match_char] or '.')
275
+ mmc = (opt[:mismatch_char] or ' ')
276
+ a = self.collect { |c| c.upcase }.sort.uniq
277
+ a.extend(SiteMethods)
278
+ if a.has_gap? then
279
+ mmc
280
+ elsif a.size == 1 then
281
+ mlc
282
+ elsif StrongConservationGroups.find { |x| (a - x).empty? } then
283
+ smc
284
+ elsif WeakConservationGroups.find { |x| (a - x).empty? } then
285
+ wmc
286
+ else
287
+ mmc
288
+ end
289
+ end
290
+
291
+ # Returns the match-line character for the site.
292
+ # This is nucleic-acid version.
293
+ def match_line_nuc(opt = {})
294
+ # opt[:match_line_char] ==> 100% equal default: '*'
295
+ # opt[:mismatch_char] ==> mismatch default: ' '
296
+ mlc = (opt[:match_line_char] or '*')
297
+ mmc = (opt[:mismatch_char] or ' ')
298
+ a = self.collect { |c| c.upcase }.sort.uniq
299
+ a.extend(SiteMethods)
300
+ if a.has_gap? then
301
+ mmc
302
+ elsif a.size == 1 then
303
+ mlc
304
+ else
305
+ mmc
306
+ end
307
+ end
308
+ end #module SiteMethods
309
+
310
+ # Bio::Alignment::Site stores bases or amino-acids in a
311
+ # site of the alignment.
312
+ # It would store multiple String objects of length 1.
313
+ # Please refer to the document of Array and SiteMethods for methods.
314
+ class Site < Array
315
+ include SiteMethods
316
+ end #module Site
317
+
318
+ # The module Bio::Alignment::EnumerableExtension is a set of useful
319
+ # methods for multiple sequence alignment.
320
+ # It can be included by any classes or can be extended to any objects.
321
+ # The classes or objects must have methods defined in Enumerable,
322
+ # and must have the <tt>each</tt> method
323
+ # which iterates over each sequence (or string) and yields
324
+ # a sequence (or string) object.
325
+ #
326
+ # Optionally, if <tt>each_seq</tt> method is defined,
327
+ # which iterates over each sequence (or string) and yields
328
+ # each sequence (or string) object, it is used instead of <tt>each</tt>.
329
+ #
330
+ # Note that the <tt>each</tt> or <tt>each_seq</tt> method would be
331
+ # called multiple times.
332
+ # This means that the module is not suitable for IO objects.
333
+ # In addition, <tt>break</tt> would be used in the given block and
334
+ # destructive methods would be used to the sequences.
335
+ #
336
+ # For Array or Hash objects, you'd better using
337
+ # ArrayExtension or HashExtension modules, respectively.
338
+ # They would have built-in <tt>each_seq</tt> method and/or
339
+ # some methods would be redefined.
340
+ #
341
+ module EnumerableExtension
342
+ include PropertyMethods
343
+
344
+ # Iterates over each sequences.
345
+ # Yields a sequence.
346
+ # It acts the same as Enumerable#each.
347
+ #
348
+ # You would redefine the method suitable for the class/object.
349
+ def each_seq(&block) #:yields: seq
350
+ each(&block)
351
+ end
352
+
353
+ # Returns class of the sequence.
354
+ # If instance variable @seqclass (which can be
355
+ # set by 'seqclass=' method) is set, simply returns the value.
356
+ # Otherwise, returns the first sequence's class.
357
+ # If no sequences are found, returns nil.
358
+ def seqclass
359
+ if @seqclass then
360
+ @seqclass
361
+ else
362
+ klass = nil
363
+ each_seq do |s|
364
+ if s then
365
+ klass = s.class
366
+ break if klass
367
+ end
368
+ end
369
+ (klass or String)
370
+ end
371
+ end
372
+
373
+ # Returns the alignment length.
374
+ # Returns the longest length of the sequence in the alignment.
375
+ def alignment_length
376
+ maxlen = 0
377
+ each_seq do |s|
378
+ x = s.length
379
+ maxlen = x if x > maxlen
380
+ end
381
+ maxlen
382
+ end
383
+ alias seq_length alignment_length
384
+
385
+ # Gets a site of the position.
386
+ # Returns a Bio::Alignment::Site object.
387
+ #
388
+ # If the position is out of range, it returns the site
389
+ # of which all are gaps.
390
+ #
391
+ # It is a private method.
392
+ # Only difference from public alignment_site method is
393
+ # it does not do <tt>set_all_property(get_all_property)</tt>.
394
+ def _alignment_site(position)
395
+ site = Site.new
396
+ each_seq do |s|
397
+ c = s[position, 1]
398
+ if c.to_s.empty?
399
+ c = seqclass.new(gap_char)
400
+ end
401
+ site << c
402
+ end
403
+ site
404
+ end
405
+ private :_alignment_site
406
+
407
+ # Gets a site of the position.
408
+ # Returns a Bio::Alignment::Site object.
409
+ #
410
+ # If the position is out of range, it returns the site
411
+ # of which all are gaps.
412
+ def alignment_site(position)
413
+ site = _alignment_site(position)
414
+ site.set_all_property(get_all_property)
415
+ site
416
+ end
417
+
418
+ # Iterates over each site of the alignment.
419
+ # It yields a Bio::Alignment::Site object (which inherits Array).
420
+ # It returns self.
421
+ def each_site
422
+ cp = get_all_property
423
+ (0...alignment_length).each do |i|
424
+ site = _alignment_site(i)
425
+ site.set_all_property(cp)
426
+ yield(site)
427
+ end
428
+ self
429
+ end
430
+
431
+ # Iterates over each site of the alignment, with specifying
432
+ # start, stop positions and step.
433
+ # It yields Bio::Alignment::Site object (which inherits Array).
434
+ # It returns self.
435
+ # It is same as
436
+ # <tt>start.step(stop, step) { |i| yield alignment_site(i) }</tt>.
437
+ def each_site_step(start, stop, step = 1)
438
+ cp = get_all_property
439
+ start.step(stop, step) do |i|
440
+ site = _alignment_site(i)
441
+ site.set_all_property(cp)
442
+ yield(site)
443
+ end
444
+ self
445
+ end
446
+
447
+ # Iterates over each sequence and results running blocks
448
+ # are collected and returns a new alignment as a
449
+ # Bio::Alignment::SequenceArray object.
450
+ #
451
+ # Note that it would be redefined if you want to change
452
+ # return value's class.
453
+ #
454
+ def alignment_collect
455
+ a = SequenceArray.new
456
+ a.set_all_property(get_all_property)
457
+ each_seq do |str|
458
+ a << yield(str)
459
+ end
460
+ a
461
+ end
462
+
463
+ # Returns specified range of the alignment.
464
+ # For each sequence, the '[]' method (it may be String#[])
465
+ # is executed, and returns a new alignment
466
+ # as a Bio::Alignment::SequenceArray object.
467
+ #
468
+ # Unlike alignment_slice method, the result alignment are
469
+ # guaranteed to contain String object if the range specified
470
+ # is out of range.
471
+ #
472
+ # If you want to change return value's class, you should redefine
473
+ # alignment_collect method.
474
+ #
475
+ def alignment_window(*arg)
476
+ alignment_collect do |s|
477
+ s[*arg] or seqclass.new('')
478
+ end
479
+ end
480
+ alias window alignment_window
481
+
482
+ # Iterates over each sliding window of the alignment.
483
+ # window_size is the size of sliding window.
484
+ # step is the step of each sliding.
485
+ # It yields a Bio::Alignment::SequenceArray object which contains
486
+ # each sliding window.
487
+ # It returns a Bio::Alignment::SequenceArray object which contains
488
+ # remainder alignment at the terminal end.
489
+ # If window_size is smaller than 0, it returns nil.
490
+ def each_window(window_size, step_size = 1)
491
+ return nil if window_size < 0
492
+ if step_size >= 0 then
493
+ i = nil
494
+ 0.step(alignment_length - window_size, step_size) do |i|
495
+ yield alignment_window(i, window_size)
496
+ end
497
+ alignment_window((i+window_size)..-1)
498
+ else
499
+ i = alignment_length - window_size
500
+ while i >= 0
501
+ yield alignment_window(i, window_size)
502
+ i += step_size
503
+ end
504
+ alignment_window(0...(i-step_size))
505
+ end
506
+ end
507
+
508
+ # Iterates over each site of the alignment and results running the
509
+ # block are collected and returns an array.
510
+ # It yields a Bio::Alignment::Site object.
511
+ def collect_each_site
512
+ ary = []
513
+ each_site do |site|
514
+ ary << yield(site)
515
+ end
516
+ ary
517
+ end
518
+
519
+ # Helper method for calculating consensus sequence.
520
+ # It iterates over each site of the alignment.
521
+ # In each site, gaps will be removed if specified with opt.
522
+ # It yields a Bio::Alignment::Site object.
523
+ # Results running the block (String objects are expected)
524
+ # are joined to a string and it returns the string.
525
+ #
526
+ # opt[:gap_mode] ==> 0 -- gaps are regarded as normal characters
527
+ # 1 -- a site within gaps is regarded as a gap
528
+ # -1 -- gaps are eliminated from consensus calculation
529
+ # default: 0
530
+ #
531
+ def consensus_each_site(opt = {})
532
+ mchar = (opt[:missing_char] or self.missing_char)
533
+ gap_mode = opt[:gap_mode]
534
+ case gap_mode
535
+ when 0, nil
536
+ collect_each_site do |a|
537
+ yield(a) or mchar
538
+ end.join('')
539
+ when 1
540
+ collect_each_site do |a|
541
+ a.has_gap? ? gap_char : (yield(a) or mchar)
542
+ end.join('')
543
+ when -1
544
+ collect_each_site do |a|
545
+ a.remove_gaps!
546
+ a.empty? ? gap_char : (yield(a) or mchar)
547
+ end.join('')
548
+ else
549
+ raise ':gap_mode must be 0, 1 or -1'
550
+ end
551
+ end
552
+
553
+ # Returns the consensus string of the alignment.
554
+ # 0.0 <= threshold <= 1.0 is expected.
555
+ #
556
+ # It resembles the BioPerl's AlignI::consensus_string method.
557
+ #
558
+ # Please refer to the consensus_each_site method for opt.
559
+ #
560
+ def consensus_string(threshold = 1.0, opt = {})
561
+ consensus_each_site(opt) do |a|
562
+ a.consensus_string(threshold)
563
+ end
564
+ end
565
+
566
+ # Returns the IUPAC consensus string of the alignment
567
+ # of nucleic-acid sequences.
568
+ #
569
+ # It resembles the BioPerl's AlignI::consensus_iupac method.
570
+ #
571
+ # Please refer to the consensus_each_site method for opt.
572
+ #
573
+ def consensus_iupac(opt = {})
574
+ consensus_each_site(opt) do |a|
575
+ a.consensus_iupac
576
+ end
577
+ end
578
+
579
+ # Returns the match line stirng of the alignment
580
+ # of amino-acid sequences.
581
+ #
582
+ # It resembles the BioPerl's AlignI::match_line method.
583
+ #
584
+ # opt[:match_line_char] ==> 100% equal default: '*'
585
+ # opt[:strong_match_char] ==> strong match default: ':'
586
+ # opt[:weak_match_char] ==> weak match default: '.'
587
+ # opt[:mismatch_char] ==> mismatch default: ' '
588
+ #
589
+ # More opt can be accepted.
590
+ # Please refer to the consensus_each_site method for opt.
591
+ #
592
+ def match_line_amino(opt = {})
593
+ collect_each_site do |a|
594
+ a.match_line_amino(opt)
595
+ end.join('')
596
+ end
597
+
598
+ # Returns the match line stirng of the alignment
599
+ # of nucleic-acid sequences.
600
+ #
601
+ # It resembles the BioPerl's AlignI::match_line method.
602
+ #
603
+ # opt[:match_line_char] ==> 100% equal default: '*'
604
+ # opt[:mismatch_char] ==> mismatch default: ' '
605
+ #
606
+ # More opt can be accepted.
607
+ # Please refer to the consensus_each_site method for opt.
608
+ #
609
+ def match_line_nuc(opt = {})
610
+ collect_each_site do |a|
611
+ a.match_line_nuc(opt)
612
+ end.join('')
613
+ end
614
+
615
+ # Returns the match line stirng of the alignment
616
+ # of nucleic- or amino-acid sequences.
617
+ # The type of the sequence is automatically determined
618
+ # or you can specify with opt[:type].
619
+ #
620
+ # It resembles the BioPerl's AlignI::match_line method.
621
+ #
622
+ # opt[:type] ==> :na or :aa (or determined by sequence class)
623
+ # opt[:match_line_char] ==> 100% equal default: '*'
624
+ # opt[:strong_match_char] ==> strong match default: ':'
625
+ # opt[:weak_match_char] ==> weak match default: '.'
626
+ # opt[:mismatch_char] ==> mismatch default: ' '
627
+ # :strong_ and :weak_match_char are used only in amino mode (:aa)
628
+ #
629
+ # More opt can be accepted.
630
+ # Please refer to the consensus_each_site method for opt.
631
+ #
632
+ def match_line(opt = {})
633
+ case opt[:type]
634
+ when :aa
635
+ amino = true
636
+ when :na, :dna, :rna
637
+ amino = false
638
+ else
639
+ if seqclass == Bio::Sequence::AA then
640
+ amino = true
641
+ elsif seqclass == Bio::Sequence::NA then
642
+ amino = false
643
+ elsif self.find { |x| /[EFILPQ]/i =~ x } then
644
+ amino = true
645
+ else
646
+ amino = nil
647
+ end
648
+ end
649
+ if amino then
650
+ match_line_amino(opt)
651
+ else
652
+ match_line_nuc(opt)
653
+ end
654
+ end
655
+
656
+ # This is the BioPerl's AlignI::match like method.
657
+ #
658
+ # Changes second to last sequences' sites to match_char(default: '.')
659
+ # when a site is equeal to the first sequence's corresponding site.
660
+ #
661
+ # Note that it is a destructive method.
662
+ #
663
+ # For Hash, please use it carefully because
664
+ # the order of the sequences is inconstant.
665
+ #
666
+ def convert_match(match_char = '.')
667
+ #(BioPerl) AlignI::match like method
668
+ len = alignment_length
669
+ firstseq = nil
670
+ each_seq do |s|
671
+ unless firstseq then
672
+ firstseq = s
673
+ else
674
+ (0...len).each do |i|
675
+ if s[i] and firstseq[i] == s[i] and !is_gap?(firstseq[i..i])
676
+ s[i..i] = match_char
677
+ end
678
+ end
679
+ end
680
+ end
681
+ self
682
+ end
683
+
684
+ # This is the BioPerl's AlignI::unmatch like method.
685
+ #
686
+ # Changes second to last sequences' sites match_char(default: '.')
687
+ # to original sites' characters.
688
+ #
689
+ # Note that it is a destructive method.
690
+ #
691
+ # For Hash, please use it carefully because
692
+ # the order of the sequences is inconstant.
693
+ #
694
+ def convert_unmatch(match_char = '.')
695
+ #(BioPerl) AlignI::unmatch like method
696
+ len = alignment_length
697
+ firstseq = nil
698
+ each_seq do |s|
699
+ unless firstseq then
700
+ firstseq = s
701
+ else
702
+ (0...len).each do |i|
703
+ if s[i..i] == match_char then
704
+ s[i..i] = (firstseq[i..i] or match_char)
705
+ end
706
+ end
707
+ end
708
+ end
709
+ self
710
+ end
711
+
712
+ # Fills gaps to the tail of each sequence if the length of
713
+ # the sequence is shorter than the alignment length.
714
+ #
715
+ # Note that it is a destructive method.
716
+ def alignment_normalize!
717
+ #(original)
718
+ len = alignment_length
719
+ each_seq do |s|
720
+ s << (gap_char * (len - s.length)) if s.length < len
721
+ end
722
+ self
723
+ end
724
+ alias normalize! alignment_normalize!
725
+
726
+ # Removes excess gaps in the tail of the sequences.
727
+ # If removes nothing, returns nil.
728
+ # Otherwise, returns self.
729
+ #
730
+ # Note that it is a destructive method.
731
+ def alignment_rstrip!
732
+ #(String-like)
733
+ len = alignment_length
734
+ newlen = len
735
+ each_site_step(len - 1, 0, -1) do |a|
736
+ a.remove_gaps!
737
+ if a.empty? then
738
+ newlen -= 1
739
+ else
740
+ break
741
+ end
742
+ end
743
+ return nil if newlen >= len
744
+ each_seq do |s|
745
+ s[newlen..-1] = '' if s.length > newlen
746
+ end
747
+ self
748
+ end
749
+ alias rstrip! alignment_rstrip!
750
+
751
+ # Removes excess gaps in the head of the sequences.
752
+ # If removes nothing, returns nil.
753
+ # Otherwise, returns self.
754
+ #
755
+ # Note that it is a destructive method.
756
+ def alignment_lstrip!
757
+ #(String-like)
758
+ pos = 0
759
+ each_site do |a|
760
+ a.remove_gaps!
761
+ if a.empty?
762
+ pos += 1
763
+ else
764
+ break
765
+ end
766
+ end
767
+ return nil if pos <= 0
768
+ each_seq { |s| s[0, pos] = '' }
769
+ self
770
+ end
771
+ alias lstrip! alignment_lstrip!
772
+
773
+ # Removes excess gaps in the sequences.
774
+ # If removes nothing, returns nil.
775
+ # Otherwise, returns self.
776
+ #
777
+ # Note that it is a destructive method.
778
+ def alignment_strip!
779
+ #(String-like)
780
+ r = alignment_rstrip!
781
+ l = alignment_lstrip!
782
+ (r or l)
783
+ end
784
+ alias strip! alignment_strip!
785
+
786
+ # Completely removes ALL gaps in the sequences.
787
+ # If removes nothing, returns nil.
788
+ # Otherwise, returns self.
789
+ #
790
+ # Note that it is a destructive method.
791
+ def remove_all_gaps!
792
+ ret = nil
793
+ each_seq do |s|
794
+ x = s.gsub!(gap_regexp, '')
795
+ ret ||= x
796
+ end
797
+ ret ? self : nil
798
+ end
799
+
800
+ # Returns the specified range of the alignment.
801
+ # For each sequence, the 'slice' method (it may be String#slice,
802
+ # which is the same as String#[]) is executed, and
803
+ # returns a new alignment as a Bio::Alignment::SequenceArray object.
804
+ #
805
+ # Unlike alignment_window method, the result alignment
806
+ # might contain nil.
807
+ #
808
+ # If you want to change return value's class, you should redefine
809
+ # alignment_collect method.
810
+ #
811
+ def alignment_slice(*arg)
812
+ #(String-like)
813
+ #(BioPerl) AlignI::slice like method
814
+ alignment_collect do |s|
815
+ s.slice(*arg)
816
+ end
817
+ end
818
+ alias slice alignment_slice
819
+
820
+ # For each sequence, the 'subseq' method (Bio::Seqeunce#subseq is
821
+ # expected) is executed, and returns a new alignment as
822
+ # a Bio::Alignment::SequenceArray object.
823
+ #
824
+ # All sequences in the alignment are expected to be kind of
825
+ # Bio::Sequence objects.
826
+ #
827
+ # Unlike alignment_window method, the result alignment
828
+ # might contain nil.
829
+ #
830
+ # If you want to change return value's class, you should redefine
831
+ # alignment_collect method.
832
+ #
833
+ def alignment_subseq(*arg)
834
+ #(original)
835
+ alignment_collect do |s|
836
+ s.subseq(*arg)
837
+ end
838
+ end
839
+ alias subseq alignment_subseq
840
+
841
+ # Concatenates the given alignment.
842
+ # <em>align</em> must have <tt>each_seq</tt>
843
+ # or <tt>each</tt> method.
844
+ #
845
+ # Returns self.
846
+ #
847
+ # Note that it is a destructive method.
848
+ #
849
+ # For Hash, please use it carefully because
850
+ # the order of the sequences is inconstant and
851
+ # key information is completely ignored.
852
+ #
853
+ def alignment_concat(align)
854
+ flag = nil
855
+ a = []
856
+ each_seq { |s| a << s }
857
+ i = 0
858
+ begin
859
+ align.each_seq do |seq|
860
+ flag = true
861
+ a[i].concat(seq) if a[i] and seq
862
+ i += 1
863
+ end
864
+ return self
865
+ rescue NoMethodError, ArgumentError => evar
866
+ raise evar if flag
867
+ end
868
+ align.each do |seq|
869
+ a[i].concat(seq) if a[i] and seq
870
+ i += 1
871
+ end
872
+ self
873
+ end
874
+ end #module EnumerableExtension
875
+
876
+ # ClustalWFormatter is a module to create ClustalW-formatted text
877
+ # from an alignment object.
878
+ #
879
+ # It will be obsoleted and the methods will be frequently changed.
880
+ module ClustalWFormatter
881
+ # Check whether there are same names.
882
+ #
883
+ # array:: names of the sequences (array of string)
884
+ # len:: length to check (default:30)
885
+ def have_same_name?(array, len = 30)
886
+ na30 = array.collect do |k|
887
+ k.to_s.split(/[\x00\s]/)[0].to_s[0, len].gsub(/\:\;\,\(\)/, '_').to_s
888
+ end
889
+ #p na30
890
+ na30idx = (0...(na30.size)).to_a
891
+ na30idx.sort! do |x,y|
892
+ na30[x] <=> na30[y]
893
+ end
894
+ #p na30idx
895
+ y = nil
896
+ dupidx = []
897
+ na30idx.each do |x|
898
+ if y and na30[y] == na30[x] then
899
+ dupidx << y
900
+ dupidx << x
901
+ end
902
+ y = x
903
+ end
904
+ if dupidx.size > 0 then
905
+ dupidx.sort!
906
+ dupidx.uniq!
907
+ dupidx
908
+ else
909
+ false
910
+ end
911
+ end
912
+ private :have_same_name?
913
+
914
+ # Changes sequence names if there are conflicted names.
915
+ #
916
+ # array:: names of the sequences (array of string)
917
+ # len:: length to check (default:30)
918
+ def avoid_same_name(array, len = 30)
919
+ na = array.collect { |k| k.to_s.gsub(/[\r\n\x00]/, ' ') }
920
+ if dupidx = have_same_name?(na, len)
921
+ procs = [
922
+ Proc.new { |s, i|
923
+ s[0, len].to_s.gsub(/\s/, '_') + s[len..-1].to_s
924
+ },
925
+ # Proc.new { |s, i|
926
+ # "#{i}_#{s}"
927
+ # },
928
+ ]
929
+ procs.each do |pr|
930
+ dupidx.each do |i|
931
+ s = array[i]
932
+ na[i] = pr.call(s.to_s, i)
933
+ end
934
+ dupidx = have_same_name?(na, len)
935
+ break unless dupidx
936
+ end
937
+ if dupidx then
938
+ na.each_with_index do |s, i|
939
+ na[i] = "#{i}_#{s}"
940
+ end
941
+ end
942
+ end
943
+ na
944
+ end
945
+ private :avoid_same_name
946
+
947
+ # Generates ClustalW-formatted text
948
+ # seqs:: sequences (must be an alignment object)
949
+ # names:: names of the sequences
950
+ # options:: options
951
+ def clustalw_formatter(seqs, names, options = {})
952
+ #(original)
953
+ aln = [ "CLUSTAL (0.00) multiple sequence alignment\n\n" ]
954
+ len = seqs.seq_length
955
+ sn = names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') }
956
+ if options[:replace_space]
957
+ sn.collect! { |x| x.gsub(/\s/, '_') }
958
+ end
959
+ if !options.has_key?(:escape) or options[:escape]
960
+ sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
961
+ end
962
+ if !options.has_key?(:split) or options[:split]
963
+ sn.collect! { |x| x.split(/\s/)[0].to_s }
964
+ end
965
+ if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
966
+ sn = avoid_same_name(sn)
967
+ end
968
+
969
+ if sn.find { |x| x.length > 10 } then
970
+ seqwidth = 50
971
+ namewidth = 30
972
+ sep = ' ' * 6
973
+ else
974
+ seqwidth = 60
975
+ namewidth = 10
976
+ sep = ' ' * 6
977
+ end
978
+ seqregexp = Regexp.new("(.{1,#{seqwidth}})")
979
+ gchar = (options[:gap_char] or '-')
980
+
981
+ case options[:type].to_s
982
+ when /protein/i, /aa/i
983
+ mopt = { :type => :aa }
984
+ when /na/i
985
+ mopt = { :type => :na }
986
+ else
987
+ mopt = {}
988
+ end
989
+ mline = (options[:match_line] or seqs.match_line(mopt))
990
+
991
+ aseqs = seqs.collect do |s|
992
+ s.to_s.gsub(seqs.gap_regexp, gchar)
993
+ end
994
+ case options[:case].to_s
995
+ when /lower/i
996
+ aseqs.each { |s| s.downcase! }
997
+ when /upper/i
998
+ aseqs.each { |s| s.upcase! }
999
+ end
1000
+
1001
+ aseqs << mline
1002
+ aseqs.collect! do |s|
1003
+ snx = sn.shift
1004
+ head = sprintf("%*s", -namewidth, snx.to_s)[0, namewidth] + sep
1005
+ s << (gchar * (len - s.length))
1006
+ s.gsub!(seqregexp, "\\1\n")
1007
+ a = s.split(/^/)
1008
+ if options[:seqnos] and snx then
1009
+ i = 0
1010
+ a.each do |x|
1011
+ x.chomp!
1012
+ l = x.tr(gchar, '').length
1013
+ i += l
1014
+ x.concat(l > 0 ? " #{i}\n" : "\n")
1015
+ end
1016
+ end
1017
+ a.collect { |x| head + x }
1018
+ end
1019
+ lines = (len + seqwidth - 1).div(seqwidth)
1020
+ lines.times do
1021
+ aln << "\n"
1022
+ aseqs.each { |a| aln << a.shift }
1023
+ end
1024
+ aln.join('')
1025
+ end
1026
+ private :clustalw_formatter
1027
+ end #module ClustalWFormatter
1028
+
1029
+
1030
+ # Bio::Alignment::ArrayExtension is a set of useful methods for
1031
+ # multiple sequence alignment.
1032
+ # It is designed to be extended to array objects or
1033
+ # included in your own classes which inherit Array.
1034
+ # (It can also be included in Array, though not recommended.)
1035
+ #
1036
+ # It possesses all methods defined in EnumerableExtension.
1037
+ # For usage of methods, please refer to EnumerableExtension.
1038
+ module ArrayExtension
1039
+ include EnumerableExtension
1040
+
1041
+ # Iterates over each sequences.
1042
+ # Yields a sequence.
1043
+ #
1044
+ # It works the same as Array#each.
1045
+ def each_seq(&block) #:yields: seq
1046
+ each(&block)
1047
+ end
1048
+
1049
+ include ClustalWFormatter
1050
+ # Returns a string of Clustal W formatted text of the alignment.
1051
+ def to_clustal(options = {})
1052
+ clustalw_formatter(self, (0...(self.size)).to_a, options)
1053
+ end
1054
+ end #module ArrayExtension
1055
+
1056
+ # Bio::Alignment::HashExtension is a set of useful methods for
1057
+ # multiple sequence alignment.
1058
+ # It is designed to be extended to hash objects or
1059
+ # included in your own classes which inherit Hash.
1060
+ # (It can also be included in Hash, though not recommended.)
1061
+ #
1062
+ # It possesses all methods defined in EnumerableExtension.
1063
+ # For usage of methods, please refer to EnumerableExtension.
1064
+ #
1065
+ # Because SequenceHash#alignment_collect is redefined,
1066
+ # some methods' return value's class are changed to
1067
+ # SequenceHash instead of SequenceArray.
1068
+ #
1069
+ # Because the order of the objects in a hash is inconstant,
1070
+ # some methods strictly affected with the order of objects
1071
+ # might not work correctly,
1072
+ # e.g. EnumerableExtension#convert_match and #convert_unmatch.
1073
+ module HashExtension
1074
+ include EnumerableExtension
1075
+
1076
+ # Iterates over each sequences.
1077
+ # Yields a sequence.
1078
+ #
1079
+ # It works the same as Hash#each_value.
1080
+ def each_seq(&block) #:yields: seq
1081
+ each_value(&block)
1082
+ end
1083
+
1084
+ # Iterates over each sequence and each results running block
1085
+ # are collected and returns a new alignment as a
1086
+ # Bio::Alignment::SequenceHash object.
1087
+ #
1088
+ # Note that it would be redefined if you want to change
1089
+ # return value's class.
1090
+ #
1091
+ def alignment_collect
1092
+ a = SequenceHash.new
1093
+ a.set_all_property(get_all_property)
1094
+ each_pair do |key, str|
1095
+ a.store(key, yield(str))
1096
+ end
1097
+ a
1098
+ end
1099
+
1100
+ # Concatenates the given alignment.
1101
+ # If <em>align</em> is a Hash (or SequenceHash),
1102
+ # sequences of same keys are concatenated.
1103
+ # Otherwise, <em>align</em> must have <tt>each_seq</tt>
1104
+ # or <tt>each</tt> method and
1105
+ # works same as EnumerableExtension#alignment_concat.
1106
+ #
1107
+ # Returns self.
1108
+ #
1109
+ # Note that it is a destructive method.
1110
+ #
1111
+ def alignment_concat(align)
1112
+ flag = nil
1113
+ begin
1114
+ align.each_pair do |key, seq|
1115
+ flag = true
1116
+ if origseq = self[key]
1117
+ origseq.concat(seq)
1118
+ end
1119
+ end
1120
+ return self
1121
+ rescue NoMethodError, ArgumentError =>evar
1122
+ raise evar if flag
1123
+ end
1124
+ a = values
1125
+ i = 0
1126
+ begin
1127
+ align.each_seq do |seq|
1128
+ flag = true
1129
+ a[i].concat(seq) if a[i] and seq
1130
+ i += 1
1131
+ end
1132
+ return self
1133
+ rescue NoMethodError, ArgumentError => evar
1134
+ raise evar if flag
1135
+ end
1136
+ align.each do |seq|
1137
+ a[i].concat(seq) if a[i] and seq
1138
+ i += 1
1139
+ end
1140
+ self
1141
+ end
1142
+
1143
+ include ClustalWFormatter
1144
+ # Returns a string of Clustal W formatted text of the alignment.
1145
+ def to_clustal(options = {})
1146
+ seqs = SequenceArray.new
1147
+ names = self.keys
1148
+ names.each do |k|
1149
+ seqs << self[k]
1150
+ end
1151
+ clustalw_formatter(seqs, names, options)
1152
+ end
1153
+ end #module HashExtension
1154
+
1155
+ # Bio::Alignment::SequenceArray is a container class of
1156
+ # multiple sequence alignment.
1157
+ # Since it inherits Array, it acts completely same as Array.
1158
+ # In addition, methods defined in ArrayExtension and EnumerableExtension
1159
+ # can be used.
1160
+ class SequenceArray < Array
1161
+ include ArrayExtension
1162
+ end #class SequenceArray
1163
+
1164
+ # Bio::Alignment::SequenceHash is a container class of
1165
+ # multiple sequence alignment.
1166
+ # Since it inherits Hash, it acts completely same as Hash.
1167
+ # In addition, methods defined in HashExtension and EnumerableExtension
1168
+ # can be used.
1169
+ class SequenceHash < Hash
1170
+ include HashExtension
1171
+ end #class SequenceHash
1172
+
1173
+ # Bio::Alignment::OriginalPrivate is a set of private methods
1174
+ # for Bio::Alignment::OriginalAlignment.
1175
+ module OriginalPrivate
1176
+
1177
+ # Gets the sequence from given object.
1178
+ def extract_seq(obj)
1179
+ seq = nil
1180
+ if obj.is_a?(Bio::Sequence) then
1181
+ seq = obj
1182
+ else
1183
+ for m in [ :seq, :naseq, :aaseq ]
1184
+ begin
1185
+ seq = obj.send(m)
1186
+ rescue NameError, ArgumentError
1187
+ seq = nil
1188
+ end
1189
+ break if seq
1190
+ end
1191
+ seq = obj unless seq
1192
+ end
1193
+ seq
1194
+ end
1195
+ module_function :extract_seq
1196
+
1197
+ # Gets the name or the definition of the sequence from given object.
1198
+ def extract_key(obj)
1199
+ sn = nil
1200
+ for m in [ :definition, :entry_id ]
1201
+ begin
1202
+ sn = obj.send(m)
1203
+ rescue NameError, ArgumentError
1204
+ sn = nil
1205
+ end
1206
+ break if sn
1207
+ end
1208
+ sn
1209
+ end
1210
+ module_function :extract_key
1211
+ end #module OriginalPrivate
1212
+
1213
+ # Bio::Alignment::OriginalAlignment is
1214
+ # the BioRuby original multiple sequence alignment container class.
1215
+ # It includes HashExtension.
1216
+ #
1217
+ # It is recommended only to use methods defined in EnumerableExtension
1218
+ # (and the each_seq method).
1219
+ # The method only defined in this class might be obsoleted in the future.
1220
+ #
1221
+ class OriginalAlignment
1222
+
1223
+ include Enumerable
1224
+ include HashExtension
1225
+ include OriginalPrivate
1226
+
1227
+ # Read files and creates a new alignment object.
1228
+ #
1229
+ # It will be obsoleted.
1230
+ def self.readfiles(*files)
1231
+ require 'bio/io/flatfile'
1232
+ aln = self.new
1233
+ files.each do |fn|
1234
+ Bio::FlatFile.open(nil, fn) do |ff|
1235
+ aln.add_sequences(ff)
1236
+ end
1237
+ end
1238
+ aln
1239
+ end
1240
+
1241
+ # Creates a new alignment object from given arguments.
1242
+ #
1243
+ # It will be obsoleted.
1244
+ def self.new2(*arg)
1245
+ self.new(arg)
1246
+ end
1247
+
1248
+ # Creates a new alignment object.
1249
+ # <em>seqs</em> may be one of follows:
1250
+ # an array of sequences (or strings),
1251
+ # an array of sequence database objects,
1252
+ # an alignment object.
1253
+ def initialize(seqs = [])
1254
+ @seqs = {}
1255
+ @keys = []
1256
+ self.add_sequences(seqs)
1257
+ end
1258
+
1259
+ # If <em>x</em> is the same value, returns true.
1260
+ # Otherwise, returns false.
1261
+ def ==(x)
1262
+ #(original)
1263
+ if x.is_a?(self.class)
1264
+ self.to_hash == x.to_hash
1265
+ else
1266
+ false
1267
+ end
1268
+ end
1269
+
1270
+ # convert to hash
1271
+ def to_hash
1272
+ #(Hash-like)
1273
+ @seqs
1274
+ end
1275
+
1276
+ # Adds sequences to the alignment.
1277
+ # <em>seqs</em> may be one of follows:
1278
+ # an array of sequences (or strings),
1279
+ # an array of sequence database objects,
1280
+ # an alignment object.
1281
+ def add_sequences(seqs)
1282
+ if block_given? then
1283
+ seqs.each do |x|
1284
+ s, key = yield x
1285
+ self.store(key, s)
1286
+ end
1287
+ else
1288
+ if seqs.is_a?(self.class) then
1289
+ seqs.each_pair do |k, s|
1290
+ self.store(k, s)
1291
+ end
1292
+ elsif seqs.respond_to?(:each_pair)
1293
+ seqs.each_pair do |k, x|
1294
+ s = extract_seq(x)
1295
+ self.store(k, s)
1296
+ end
1297
+ else
1298
+ seqs.each do |x|
1299
+ s = extract_seq(x)
1300
+ k = extract_key(x)
1301
+ self.store(k, s)
1302
+ end
1303
+ end
1304
+ end
1305
+ self
1306
+ end
1307
+
1308
+ # identifiers (or definitions or names) of the sequences
1309
+ attr_reader :keys
1310
+
1311
+ # stores a sequences with the name
1312
+ # key:: name of the sequence
1313
+ # seq:: sequence
1314
+ def __store__(key, seq)
1315
+ #(Hash-like)
1316
+ h = { key => seq }
1317
+ @keys << h.keys[0]
1318
+ @seqs.update(h)
1319
+ seq
1320
+ end
1321
+
1322
+ # stores a sequence with <em>key</em>
1323
+ # (name or definition of the sequence).
1324
+ # Unlike <tt>__store__</tt> method, the method doesn't allow
1325
+ # same keys.
1326
+ # If the key is already used, returns nil.
1327
+ # When succeeded, returns key.
1328
+ def store(key, seq)
1329
+ #(Hash-like) returns key instead of seq
1330
+ if @seqs.has_key?(key) then
1331
+ # don't allow same key
1332
+ # New key is discarded, while existing key is preserved.
1333
+ key = nil
1334
+ end
1335
+ unless key then
1336
+ unless defined?(@serial)
1337
+ @serial = 0
1338
+ end
1339
+ @serial = @seqs.size if @seqs.size > @serial
1340
+ while @seqs.has_key?(@serial)
1341
+ @serial += 1
1342
+ end
1343
+ key = @serial
1344
+ end
1345
+ self.__store__(key, seq)
1346
+ key
1347
+ end
1348
+
1349
+ # Reconstructs internal data structure.
1350
+ # (Like Hash#rehash)
1351
+ def rehash
1352
+ @seqs.rehash
1353
+ oldkeys = @keys
1354
+ tmpkeys = @seqs.keys
1355
+ @keys.collect! do |k|
1356
+ tmpkeys.delete(k)
1357
+ end
1358
+ @keys.compact!
1359
+ @keys.concat(tmpkeys)
1360
+ self
1361
+ end
1362
+
1363
+ # Prepends seq (with key) to the front of the alignment.
1364
+ # (Like Array#unshift)
1365
+ def unshift(key, seq)
1366
+ #(Array-like)
1367
+ self.store(key, seq)
1368
+ k = @keys.pop
1369
+ @keys.unshift(k)
1370
+ k
1371
+ end
1372
+
1373
+ # Removes the first sequence in the alignment and
1374
+ # returns [ key, seq ].
1375
+ def shift
1376
+ k = @keys.shift
1377
+ if k then
1378
+ s = @seqs.delete(k)
1379
+ [ k, s ]
1380
+ else
1381
+ nil
1382
+ end
1383
+ end
1384
+
1385
+ # Gets the <em>n</em>-th sequence.
1386
+ # If not found, returns nil.
1387
+ def order(n)
1388
+ #(original)
1389
+ @seqs[@keys[n]]
1390
+ end
1391
+
1392
+ # Removes the sequence whose key is <em>key</em>.
1393
+ # Returns the removed sequence.
1394
+ # If not found, returns nil.
1395
+ def delete(key)
1396
+ #(Hash-like)
1397
+ @keys.delete(key)
1398
+ @seqs.delete(key)
1399
+ end
1400
+
1401
+ # Returns sequences. (Like Hash#values)
1402
+ def values
1403
+ #(Hash-like)
1404
+ @keys.collect { |k| @seqs[k] }
1405
+ end
1406
+
1407
+ # Adds a sequence without key.
1408
+ # The key is automatically determined.
1409
+ def <<(seq)
1410
+ #(Array-like)
1411
+ self.store(nil, seq)
1412
+ self
1413
+ end
1414
+
1415
+ # Gets a sequence. (Like Hash#[])
1416
+ def [](*arg)
1417
+ #(Hash-like)
1418
+ @seqs[*arg]
1419
+ end
1420
+
1421
+ # Number of sequences in the alignment.
1422
+ def size
1423
+ #(Hash&Array-like)
1424
+ @seqs.size
1425
+ end
1426
+
1427
+ # If the key exists, returns true. Otherwise, returns false.
1428
+ # (Like Hash#has_key?)
1429
+ def has_key?(key)
1430
+ #(Hash-like)
1431
+ @seqs.has_key?(key)
1432
+ end
1433
+
1434
+ # Iterates over each sequence.
1435
+ # (Like Array#each)
1436
+ def each
1437
+ #(Array-like)
1438
+ @keys.each do |k|
1439
+ yield @seqs[k]
1440
+ end
1441
+ end
1442
+ alias each_seq each
1443
+
1444
+ # Iterates over each key and sequence.
1445
+ # (Like Hash#each_pair)
1446
+ def each_pair
1447
+ #(Hash-like)
1448
+ @keys.each do |k|
1449
+ yield k, @seqs[k]
1450
+ end
1451
+ end
1452
+
1453
+ # Iterates over each sequence, replacing the sequence with the
1454
+ # value returned by the block.
1455
+ def collect!
1456
+ #(Array-like)
1457
+ @keys.each do |k|
1458
+ @seqs[k] = yield @seqs[k]
1459
+ end
1460
+ end
1461
+
1462
+ ###--
1463
+ ### note that 'collect' and 'to_a' is defined in Enumerable
1464
+ ###
1465
+ ### instance-variable-related methods
1466
+ ###++
1467
+
1468
+ # Creates new alignment. Internal use only.
1469
+ def new(*arg)
1470
+ na = self.class.new(*arg)
1471
+ na.set_all_property(get_all_property)
1472
+ na
1473
+ end
1474
+ protected :new
1475
+
1476
+ # Duplicates the alignment
1477
+ def dup
1478
+ #(Hash-like)
1479
+ self.new(self)
1480
+ end
1481
+
1482
+ #--
1483
+ # methods below should not access instance variables
1484
+ #++
1485
+
1486
+ # Merges given alignment and returns a new alignment.
1487
+ def merge(*other)
1488
+ #(Hash-like)
1489
+ na = self.new(self)
1490
+ na.merge!(*other)
1491
+ na
1492
+ end
1493
+
1494
+ # Merge given alignment.
1495
+ # Note that it is destructive method.
1496
+ def merge!(*other)
1497
+ #(Hash-like)
1498
+ if block_given? then
1499
+ other.each do |aln|
1500
+ aln.each_pair do |k, s|
1501
+ if self.has_key?(k) then
1502
+ s = yield k, self[k], s
1503
+ self.to_hash.store(k, s)
1504
+ else
1505
+ self.store(k, s)
1506
+ end
1507
+ end
1508
+ end
1509
+ else
1510
+ other.each do |aln|
1511
+ aln.each_pair do |k, s|
1512
+ self.delete(k) if self.has_key?(k)
1513
+ self.store(k, s)
1514
+ end
1515
+ end
1516
+ end
1517
+ self
1518
+ end
1519
+
1520
+ # Returns the key for a given sequence. If not found, returns nil.
1521
+ def index(seq)
1522
+ #(Hash-like)
1523
+ k = nil
1524
+ self.each_pair do |k, s|
1525
+ if s.class == seq.class then
1526
+ r = (s == seq)
1527
+ else
1528
+ r = (s.to_s == seq.to_s)
1529
+ end
1530
+ break if r
1531
+ end
1532
+ k
1533
+ end
1534
+
1535
+ # Sequences in the alignment are duplicated.
1536
+ # If keys are given to the argument, sequences of given keys are
1537
+ # duplicated.
1538
+ #
1539
+ # It will be obsoleted.
1540
+ def isolate(*arg)
1541
+ #(original)
1542
+ if arg.size == 0 then
1543
+ self.collect! do |s|
1544
+ seqclass.new(s)
1545
+ end
1546
+ else
1547
+ arg.each do |k|
1548
+ if self.has_key?(k) then
1549
+ s = self.delete(key)
1550
+ self.store(k, seqclass.new(s))
1551
+ end
1552
+ end
1553
+ end
1554
+ self
1555
+ end
1556
+
1557
+ # Iterates over each sequence and each results running block
1558
+ # are collected and returns a new alignment.
1559
+ #
1560
+ # The method name 'collect_align' will be obsoleted.
1561
+ # Please use 'alignment_collect' instead.
1562
+ def alignment_collect
1563
+ #(original)
1564
+ na = self.class.new
1565
+ na.set_all_property(get_all_property)
1566
+ self.each_pair do |k, s|
1567
+ na.store(k, yield(s))
1568
+ end
1569
+ na
1570
+ end
1571
+ alias collect_align alignment_collect
1572
+
1573
+ # Removes empty sequences or nil in the alignment.
1574
+ # (Like Array#compact!)
1575
+ def compact!
1576
+ #(Array-like)
1577
+ d = []
1578
+ self.each_pair do |k, s|
1579
+ if !s or s.empty?
1580
+ d << k
1581
+ end
1582
+ end
1583
+ d.each do |k|
1584
+ self.delete(k)
1585
+ end
1586
+ d.empty? ? nil : d
1587
+ end
1588
+
1589
+ # Removes empty sequences or nil and returns new alignment.
1590
+ # (Like Array#compact)
1591
+ def compact
1592
+ #(Array-like)
1593
+ na = self.dup
1594
+ na.compact!
1595
+ na
1596
+ end
1597
+
1598
+ # Adds a sequence to the alignment.
1599
+ # Returns key if succeeded.
1600
+ # Returns nil (and not added to the alignment) if key is already used.
1601
+ #
1602
+ # It resembles BioPerl's AlignI::add_seq method.
1603
+ def add_seq(seq, key = nil)
1604
+ #(BioPerl) AlignI::add_seq like method
1605
+ unless seq.is_a?(Bio::Sequence) then
1606
+ s = extract_seq(seq)
1607
+ key = extract_key(seq) unless key
1608
+ seq = s
1609
+ end
1610
+ self.store(key, seq)
1611
+ end
1612
+
1613
+ # Removes given sequence from the alignment.
1614
+ # Returns removed sequence. If nothing removed, returns nil.
1615
+ #
1616
+ # It resembles BioPerl's AlignI::remove_seq.
1617
+ def remove_seq(seq)
1618
+ #(BioPerl) AlignI::remove_seq like method
1619
+ if k = self.index(seq) then
1620
+ self.delete(k)
1621
+ else
1622
+ nil
1623
+ end
1624
+ end
1625
+
1626
+ # Removes sequences from the alignment by given keys.
1627
+ # Returns an alignment object consists of removed sequences.
1628
+ #
1629
+ # It resembles BioPerl's AlignI::purge method.
1630
+ def purge(*arg)
1631
+ #(BioPerl) AlignI::purge like method
1632
+ purged = self.new
1633
+ arg.each do |k|
1634
+ if self[k] then
1635
+ purged.store(k, self.delete(k))
1636
+ end
1637
+ end
1638
+ purged
1639
+ end
1640
+
1641
+ # If block is given, it acts like Array#select (Enumerable#select).
1642
+ # Returns a new alignment containing all sequences of the alignment
1643
+ # for which return value of given block is not false nor nil.
1644
+ #
1645
+ # If no block is given, it acts like the BioPerl's AlignI::select.
1646
+ # Returns a new alignment containing sequences of given keys.
1647
+ #
1648
+ # The BioPerl's AlignI::select-like action will be obsoleted.
1649
+ def select(*arg)
1650
+ #(original)
1651
+ na = self.new
1652
+ if block_given? then
1653
+ # 'arg' is ignored
1654
+ # nearly same action as Array#select (Enumerable#select)
1655
+ self.each_pair.each do |k, s|
1656
+ na.store(k, s) if yield(s)
1657
+ end
1658
+ else
1659
+ # BioPerl's AlignI::select like function
1660
+ arg.each do |k|
1661
+ if s = self[k] then
1662
+ na.store(k, s)
1663
+ end
1664
+ end
1665
+ end
1666
+ na
1667
+ end
1668
+
1669
+ # The method name <tt>slice</tt> will be obsoleted.
1670
+ # Please use <tt>alignment_slice</tt> instead.
1671
+ alias slice alignment_slice
1672
+
1673
+ # The method name <tt>subseq</tt> will be obsoleted.
1674
+ # Please use <tt>alignment_subseq</tt> instead.
1675
+ alias subseq alignment_subseq
1676
+
1677
+ # Not-destructive version of alignment_normalize!.
1678
+ # Returns a new alignment.
1679
+ def normalize
1680
+ #(original)
1681
+ na = self.dup
1682
+ na.alignment_normalize!
1683
+ na
1684
+ end
1685
+
1686
+ # Not-destructive version of alignment_rstrip!.
1687
+ # Returns a new alignment.
1688
+ def rstrip
1689
+ #(String-like)
1690
+ na = self.dup
1691
+ na.isolate
1692
+ na.alignment_rstrip!
1693
+ na
1694
+ end
1695
+
1696
+ # Not-destructive version of alignment_lstrip!.
1697
+ # Returns a new alignment.
1698
+ def lstrip
1699
+ #(String-like)
1700
+ na = self.dup
1701
+ na.isolate
1702
+ na.alignment_lstrip!
1703
+ na
1704
+ end
1705
+
1706
+ # Not-destructive version of alignment_strip!.
1707
+ # Returns a new alignment.
1708
+ def strip
1709
+ #(String-like)
1710
+ na = self.dup
1711
+ na.isolate
1712
+ na.alignment_strip!
1713
+ na
1714
+ end
1715
+
1716
+ # Not-destructive version of remove_gaps!.
1717
+ # Returns a new alignment.
1718
+ #
1719
+ # The method name 'remove_gap' will be obsoleted.
1720
+ # Please use 'remove_all_gaps' instead.
1721
+ def remove_all_gaps
1722
+ #(original)
1723
+ na = self.dup
1724
+ na.isolate
1725
+ na.remove_all_gaps!
1726
+ na
1727
+ end
1728
+
1729
+ # Concatenates a string or an alignment.
1730
+ # Returns self.
1731
+ #
1732
+ # Note that the method will be obsoleted.
1733
+ # Please use <tt>each_seq { |s| s << str }</tt> for concatenating
1734
+ # a string and
1735
+ # <tt>alignment_concat(aln)</tt> for concatenating an alignment.
1736
+ def concat(aln)
1737
+ #(String-like)
1738
+ if aln.respond_to?(:to_str) then #aln.is_a?(String)
1739
+ self.each do |s|
1740
+ s << aln
1741
+ end
1742
+ self
1743
+ else
1744
+ alignment_concat(aln)
1745
+ end
1746
+ end
1747
+
1748
+ # Replace the specified region of the alignment to aln.
1749
+ # aln:: String or Bio::Alignment object
1750
+ # arg:: same format as String#slice
1751
+ #
1752
+ # It will be obsoleted.
1753
+ def replace_slice(aln, *arg)
1754
+ #(original)
1755
+ if aln.respond_to?(:to_str) then #aln.is_a?(String)
1756
+ self.each do |s|
1757
+ s[*arg] = aln
1758
+ end
1759
+ elsif aln.is_a?(self.class) then
1760
+ aln.each_pair do |k, s|
1761
+ self[k][*arg] = s
1762
+ end
1763
+ else
1764
+ i = 0
1765
+ aln.each do |s|
1766
+ self.order(i)[*arg] = s
1767
+ i += 1
1768
+ end
1769
+ end
1770
+ self
1771
+ end
1772
+
1773
+ # Performs multiple alignment by using external program.
1774
+ def do_align(factory)
1775
+ a0 = self.class.new
1776
+ (0...self.size).each { |i| a0.store(i, self.order(i)) }
1777
+ r = factory.query(a0)
1778
+ a1 = r.alignment
1779
+ a0.keys.each do |k|
1780
+ unless a1[k.to_s] then
1781
+ raise 'alignment result is inconsistent with input data'
1782
+ end
1783
+ end
1784
+ a2 = self.new
1785
+ a0.keys.each do |k|
1786
+ a2.store(self.keys[k], a1[k.to_s])
1787
+ end
1788
+ a2
1789
+ end
1790
+
1791
+ # Convert to fasta format and returns an array of strings.
1792
+ #
1793
+ # It will be obsoleted.
1794
+ def to_fasta_array(*arg)
1795
+ #(original)
1796
+ width = nil
1797
+ if arg[0].is_a?(Integer) then
1798
+ width = arg.shift
1799
+ end
1800
+ options = (arg.shift or {})
1801
+ width = options[:width] unless width
1802
+ if options[:avoid_same_name] then
1803
+ na = avoid_same_name(self.keys, 30)
1804
+ else
1805
+ na = self.keys.collect { |k| k.to_s.gsub(/[\r\n\x00]/, ' ') }
1806
+ end
1807
+ a = self.collect do |s|
1808
+ ">#{na.shift}\n" +
1809
+ if width then
1810
+ s.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
1811
+ else
1812
+ s.to_s + "\n"
1813
+ end
1814
+ end
1815
+ a
1816
+ end
1817
+
1818
+ # Convets to fasta format and returns an array of FastaFormat objects.
1819
+ #
1820
+ # It will be obsoleted.
1821
+ def to_fastaformat_array(*arg)
1822
+ #(original)
1823
+ require 'bio/db/fasta'
1824
+ a = self.to_fasta_array(*arg)
1825
+ a.collect! do |x|
1826
+ Bio::FastaFormat.new(x)
1827
+ end
1828
+ a
1829
+ end
1830
+
1831
+ # Converts to fasta format and returns a string.
1832
+ #
1833
+ # The specification of the argument will be changed.
1834
+ def to_fasta(*arg)
1835
+ #(original)
1836
+ self.to_fasta_array(*arg).join('')
1837
+ end
1838
+
1839
+ include ClustalWFormatter
1840
+ # Returns a string of Clustal W formatted text of the alignment.
1841
+ def to_clustal(options = {})
1842
+ clustalw_formatter(self, self.keys, options)
1843
+ end
1844
+
1845
+ # The method name <tt>consensus</tt> will be obsoleted.
1846
+ # Please use <tt>consensus_string</tt> instead.
1847
+ alias consensus consensus_string
1848
+ end #class OriginalAlignment
1849
+
1850
+ # Bio::Alignment::GAP is a set of class methods for
1851
+ # gap-related position translation.
1852
+ module GAP
1853
+ # position with gaps are translated into the position without gaps.
1854
+ #<em>seq</em>:: sequence
1855
+ #<em>pos</em>:: position with gaps
1856
+ #<em>gap_regexp</em>:: regular expression to specify gaps
1857
+ def ungapped_pos(seq, pos, gap_regexp)
1858
+ p = seq[0..pos].gsub(gap_regexp, '').length
1859
+ p -= 1 if p > 0
1860
+ p
1861
+ end
1862
+ module_function :ungapped_pos
1863
+
1864
+ # position without gaps are translated into the position with gaps.
1865
+ #<em>seq</em>:: sequence
1866
+ #<em>pos</em>:: position with gaps
1867
+ #<em>gap_regexp</em>:: regular expression to specify gaps
1868
+ def gapped_pos(seq, pos, gap_regexp)
1869
+ olen = seq.gsub(gap_regexp, '').length
1870
+ pos = olen if pos >= olen
1871
+ pos = olen + pos if pos < 0
1872
+
1873
+ i = 0
1874
+ l = pos + 1
1875
+ while l > 0 and i < seq.length
1876
+ x = seq[i, l].gsub(gap_regexp, '').length
1877
+ i += l
1878
+ l -= x
1879
+ end
1880
+ i -= 1 if i > 0
1881
+ i
1882
+ end
1883
+ module_function :gapped_pos
1884
+ end # module GAP
1885
+
1886
+ # creates a new Bio::Alignment::OriginalAlignment object.
1887
+ # Please refer document of OriginalAlignment.new.
1888
+ def self.new(*arg)
1889
+ OriginalAlignment.new(*arg)
1890
+ end
1891
+
1892
+ # creates a new Bio::Alignment::OriginalAlignment object.
1893
+ # Please refer document of OriginalAlignment.new2.
1894
+ def self.new2(*arg)
1895
+ OriginalAlignment.new2(*arg)
1896
+ end
1897
+
1898
+ # creates a new Bio::Alignment::OriginalAlignment object.
1899
+ # Please refer document of OriginalAlignment.readfiles.
1900
+ def self.readfiles(*files)
1901
+ OriginalAlignment.readfiles(*files)
1902
+ end
1903
+ end #module Alignment
1904
+
1905
+ end #module Bio
1906
+