bio 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,1906 @@
1
+ #
2
+ # = bio/alignment.rb - multiple alignment of sequences
3
+ #
4
+ # Copyright:: Copyright (C) 2003, 2005
5
+ # GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
6
+ #
7
+ # License:: LGPL
8
+ #
9
+ # $Id: alignment.rb,v 1.14 2005/12/02 12:01:28 ngoto Exp $
10
+ #
11
+ #--
12
+ # This library is free software; you can redistribute it and/or
13
+ # modify it under the terms of the GNU Lesser General Public
14
+ # License as published by the Free Software Foundation; either
15
+ # version 2 of the License, or (at your option) any later version.
16
+ #
17
+ # This library is distributed in the hope that it will be useful,
18
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
19
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
+ # Lesser General Public License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Lesser General Public
23
+ # License along with this library; if not, write to the Free Software
24
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
+ #++
26
+ #
27
+ # = About Bio::Alignment
28
+ #
29
+ # Please refer document of Bio::Alignment module.
30
+ #
31
+ # = References
32
+ #
33
+ # * Bio::Align::AlignI class of the BioPerl.
34
+ # http://doc.bioperl.org/releases/bioperl-1.4/Bio/Align/AlignI.html
35
+ #
36
+ # * Bio::SimpleAlign class of the BioPerl.
37
+ # http://doc.bioperl.org/releases/bioperl-1.4/Bio/SimpleAlign.html
38
+ #
39
+
40
+ require 'bio/sequence'
41
+
42
+ module Bio
43
+
44
+ =begin rdoc
45
+
46
+ = About Bio::Alignment
47
+
48
+ Bio::Alignment is a namespace of classes/modules for multiple sequence
49
+ alignment.
50
+
51
+ = Multiple alignment container classes
52
+
53
+ == Bio::Alignment::OriginalAlignment
54
+
55
+ == Bio::Alignment::SequenceArray
56
+
57
+ == Bio::Alignment::SequenceHash
58
+
59
+ = Bio::Alignment::Site
60
+
61
+ = Modules
62
+
63
+ == Bio::Alignment::EnumerableExtension
64
+
65
+ Mix-in for classes included Enumerable.
66
+
67
+ == Bio::Alignment::ArrayExtension
68
+
69
+ Mix-in for Array or Array-like classes.
70
+
71
+ == Bio::Alignment::HashExtension
72
+
73
+ Mix-in for Hash or Hash-like classes.
74
+
75
+ == Bio::Alignment::SiteMethods
76
+
77
+ == Bio::Alignment::PropertyMethods
78
+
79
+ = Bio::Alignment::GAP
80
+
81
+ = Compatibility from older BioRuby
82
+
83
+ =end
84
+ module Alignment
85
+
86
+ # Bio::Alignment::PropertyMethods is a set of methods to treat
87
+ # the gap character and so on.
88
+ module PropertyMethods
89
+ # regular expression for detecting gaps.
90
+ GAP_REGEXP = /[^a-zA-Z]/
91
+ # gap character
92
+ GAP_CHAR = '-'.freeze
93
+ # missing character
94
+ MISSING_CHAR = '?'.freeze
95
+
96
+ # If given character is a gap, returns true.
97
+ # Otherwise, return false.
98
+ # Note that <em>s</em> must be a String which contain a single character.
99
+ def is_gap?(s)
100
+ (gap_regexp =~ s) ? true : false
101
+ end
102
+
103
+ # Returns regular expression for checking gap.
104
+ def gap_regexp
105
+ @gap_regexp or GAP_REGEXP
106
+ end
107
+ # regular expression for checking gap
108
+ attr_writer :gap_regexp
109
+
110
+ # Gap character.
111
+ def gap_char
112
+ @gap_char or GAP_CHAR
113
+ end
114
+ # gap character
115
+ attr_writer :gap_char
116
+
117
+ # Character if the site is missing or unknown.
118
+ def missing_char
119
+ @missing_char or MISSING_CHAR
120
+ end
121
+ # Character if the site is missing or unknown.
122
+ attr_writer :missing_char
123
+
124
+ # Returns class of the sequence.
125
+ # If instance variable @seqclass (which can be
126
+ # set by 'seqclass=' method) is set, simply returns the value.
127
+ # Otherwise, returns the first sequence's class.
128
+ # If no sequences are found, returns nil.
129
+ def seqclass
130
+ @seqclass or String
131
+ end
132
+
133
+ # The class of the sequence.
134
+ # The value must be String or its derivatives.
135
+ attr_writer :seqclass
136
+
137
+ # Returns properties defined in the object as an hash.
138
+ def get_all_property
139
+ ret = {}
140
+ if defined? @gap_regexp
141
+ ret[:gap_regexp] = @gap_regexp
142
+ end
143
+ if defined? @gap_char
144
+ ret[:gap_char] = @gap_char
145
+ end
146
+ if defined? @missing_char
147
+ ret[:missing_char] = @missing_char
148
+ end
149
+ if defined? @seqclass
150
+ ret[:seqclass] = @seqclass
151
+ end
152
+ ret
153
+ end
154
+
155
+ # Sets properties from given hash.
156
+ # <em>hash</em> would be a return value of <tt>get_character</tt> method.
157
+ def set_all_property(hash)
158
+ @gap_regexp = hash[:gap_regexp] if hash.has_key?(:gap_regexp)
159
+ @gap_char = hash[:gap_char] if hash.has_key?(:gap_char)
160
+ @missing_char = hash[:missing_char] if hash.has_key?(:missing_char)
161
+ @seqclass = hash[:seqclass] if hash.has_key?(:seqclass)
162
+ self
163
+ end
164
+ end #module PropertyMethods
165
+
166
+ # Bio::Alignment::SiteMethods is a set of methods for
167
+ # Bio::Alignment::Site.
168
+ # It can also be used for extending an array of single-letter strings.
169
+ module SiteMethods
170
+ include PropertyMethods
171
+
172
+ # If there are gaps, returns true. Otherwise, returns false.
173
+ def has_gap?
174
+ (find { |x| is_gap?(x) }) ? true : false
175
+ end
176
+
177
+ # Removes gaps in the site. (destructive method)
178
+ def remove_gaps!
179
+ flag = nil
180
+ self.collect! do |x|
181
+ if is_gap?(x) then flag = self; nil; else x; end
182
+ end
183
+ self.compact!
184
+ flag
185
+ end
186
+
187
+ # Returns consensus character of the site.
188
+ # If consensus is found, eturns a single-letter string.
189
+ # If not, returns nil.
190
+ def consensus_string(threshold = 1.0)
191
+ return nil if self.size <= 0
192
+ return self[0] if self.sort.uniq.size == 1
193
+ h = Hash.new(0)
194
+ self.each { |x| h[x] += 1 }
195
+ total = self.size
196
+ b = h.to_a.sort do |x,y|
197
+ z = (y[1] <=> x[1])
198
+ z = (self.index(x[0]) <=> self.index(y[0])) if z == 0
199
+ z
200
+ end
201
+ if total * threshold <= b[0][1] then
202
+ b[0][0]
203
+ else
204
+ nil
205
+ end
206
+ end
207
+
208
+ # IUPAC nucleotide groups. Internal use only.
209
+ IUPAC_NUC = [
210
+ %w( t u ),
211
+ %w( m a c ),
212
+ %w( r a g ),
213
+ %w( w a t u ),
214
+ %w( s c g ),
215
+ %w( y c t u ),
216
+ %w( k g t u ),
217
+ %w( v a c g m r s ),
218
+ %w( h a c t u m w y ),
219
+ %w( d a g t u r w k ),
220
+ %w( b c g t u s y k ),
221
+ %w( n a c g t u m r w s y k v h d b )
222
+ ]
223
+
224
+ # Returns an IUPAC consensus base for the site.
225
+ # If consensus is found, eturns a single-letter string.
226
+ # If not, returns nil.
227
+ def consensus_iupac
228
+ a = self.collect { |x| x.downcase }.sort.uniq
229
+ if a.size == 1 then
230
+ case a[0]
231
+ when 'a', 'c', 'g', 't'
232
+ a[0]
233
+ when 'u'
234
+ 't'
235
+ else
236
+ IUPAC_NUC.find { |x| a[0] == x[0] } ? a[0] : nil
237
+ end
238
+ elsif r = IUPAC_NUC.find { |x| (a - x).size <= 0 } then
239
+ r[0]
240
+ else
241
+ nil
242
+ end
243
+ end
244
+
245
+ # Table of strongly conserved amino-acid groups.
246
+ #
247
+ # The value of the tables are taken from BioPerl
248
+ # (Bio/SimpleAlign.pm in BioPerl 1.0),
249
+ # and the BioPerl's document says that
250
+ # it is taken from Clustalw documentation and
251
+ # These are all the positively scoring groups that occur in the
252
+ # Gonnet Pam250 matrix. The strong and weak groups are
253
+ # defined as strong score >0.5 and weak score =<0.5 respectively.
254
+ #
255
+ StrongConservationGroups = %w(STA NEQK NHQK NDEQ QHRK MILV MILF
256
+ HY FYW).collect { |x| x.split('').sort }
257
+
258
+ # Table of weakly conserved amino-acid groups.
259
+ #
260
+ # Please refer StrongConservationGroups document
261
+ # for the origin of the table.
262
+ WeakConservationGroups = %w(CSA ATV SAG STNK STPA SGND SNDEQK
263
+ NDEQHK NEQHRK FVLIM HFY).collect { |x| x.split('').sort }
264
+
265
+ # Returns the match-line character for the site.
266
+ # This is amino-acid version.
267
+ def match_line_amino(opt = {})
268
+ # opt[:match_line_char] ==> 100% equal default: '*'
269
+ # opt[:strong_match_char] ==> strong match default: ':'
270
+ # opt[:weak_match_char] ==> weak match default: '.'
271
+ # opt[:mismatch_char] ==> mismatch default: ' '
272
+ mlc = (opt[:match_line_char] or '*')
273
+ smc = (opt[:strong_match_char] or ':')
274
+ wmc = (opt[:weak_match_char] or '.')
275
+ mmc = (opt[:mismatch_char] or ' ')
276
+ a = self.collect { |c| c.upcase }.sort.uniq
277
+ a.extend(SiteMethods)
278
+ if a.has_gap? then
279
+ mmc
280
+ elsif a.size == 1 then
281
+ mlc
282
+ elsif StrongConservationGroups.find { |x| (a - x).empty? } then
283
+ smc
284
+ elsif WeakConservationGroups.find { |x| (a - x).empty? } then
285
+ wmc
286
+ else
287
+ mmc
288
+ end
289
+ end
290
+
291
+ # Returns the match-line character for the site.
292
+ # This is nucleic-acid version.
293
+ def match_line_nuc(opt = {})
294
+ # opt[:match_line_char] ==> 100% equal default: '*'
295
+ # opt[:mismatch_char] ==> mismatch default: ' '
296
+ mlc = (opt[:match_line_char] or '*')
297
+ mmc = (opt[:mismatch_char] or ' ')
298
+ a = self.collect { |c| c.upcase }.sort.uniq
299
+ a.extend(SiteMethods)
300
+ if a.has_gap? then
301
+ mmc
302
+ elsif a.size == 1 then
303
+ mlc
304
+ else
305
+ mmc
306
+ end
307
+ end
308
+ end #module SiteMethods
309
+
310
+ # Bio::Alignment::Site stores bases or amino-acids in a
311
+ # site of the alignment.
312
+ # It would store multiple String objects of length 1.
313
+ # Please refer to the document of Array and SiteMethods for methods.
314
+ class Site < Array
315
+ include SiteMethods
316
+ end #module Site
317
+
318
+ # The module Bio::Alignment::EnumerableExtension is a set of useful
319
+ # methods for multiple sequence alignment.
320
+ # It can be included by any classes or can be extended to any objects.
321
+ # The classes or objects must have methods defined in Enumerable,
322
+ # and must have the <tt>each</tt> method
323
+ # which iterates over each sequence (or string) and yields
324
+ # a sequence (or string) object.
325
+ #
326
+ # Optionally, if <tt>each_seq</tt> method is defined,
327
+ # which iterates over each sequence (or string) and yields
328
+ # each sequence (or string) object, it is used instead of <tt>each</tt>.
329
+ #
330
+ # Note that the <tt>each</tt> or <tt>each_seq</tt> method would be
331
+ # called multiple times.
332
+ # This means that the module is not suitable for IO objects.
333
+ # In addition, <tt>break</tt> would be used in the given block and
334
+ # destructive methods would be used to the sequences.
335
+ #
336
+ # For Array or Hash objects, you'd better using
337
+ # ArrayExtension or HashExtension modules, respectively.
338
+ # They would have built-in <tt>each_seq</tt> method and/or
339
+ # some methods would be redefined.
340
+ #
341
+ module EnumerableExtension
342
+ include PropertyMethods
343
+
344
+ # Iterates over each sequences.
345
+ # Yields a sequence.
346
+ # It acts the same as Enumerable#each.
347
+ #
348
+ # You would redefine the method suitable for the class/object.
349
+ def each_seq(&block) #:yields: seq
350
+ each(&block)
351
+ end
352
+
353
+ # Returns class of the sequence.
354
+ # If instance variable @seqclass (which can be
355
+ # set by 'seqclass=' method) is set, simply returns the value.
356
+ # Otherwise, returns the first sequence's class.
357
+ # If no sequences are found, returns nil.
358
+ def seqclass
359
+ if @seqclass then
360
+ @seqclass
361
+ else
362
+ klass = nil
363
+ each_seq do |s|
364
+ if s then
365
+ klass = s.class
366
+ break if klass
367
+ end
368
+ end
369
+ (klass or String)
370
+ end
371
+ end
372
+
373
+ # Returns the alignment length.
374
+ # Returns the longest length of the sequence in the alignment.
375
+ def alignment_length
376
+ maxlen = 0
377
+ each_seq do |s|
378
+ x = s.length
379
+ maxlen = x if x > maxlen
380
+ end
381
+ maxlen
382
+ end
383
+ alias seq_length alignment_length
384
+
385
+ # Gets a site of the position.
386
+ # Returns a Bio::Alignment::Site object.
387
+ #
388
+ # If the position is out of range, it returns the site
389
+ # of which all are gaps.
390
+ #
391
+ # It is a private method.
392
+ # Only difference from public alignment_site method is
393
+ # it does not do <tt>set_all_property(get_all_property)</tt>.
394
+ def _alignment_site(position)
395
+ site = Site.new
396
+ each_seq do |s|
397
+ c = s[position, 1]
398
+ if c.to_s.empty?
399
+ c = seqclass.new(gap_char)
400
+ end
401
+ site << c
402
+ end
403
+ site
404
+ end
405
+ private :_alignment_site
406
+
407
+ # Gets a site of the position.
408
+ # Returns a Bio::Alignment::Site object.
409
+ #
410
+ # If the position is out of range, it returns the site
411
+ # of which all are gaps.
412
+ def alignment_site(position)
413
+ site = _alignment_site(position)
414
+ site.set_all_property(get_all_property)
415
+ site
416
+ end
417
+
418
+ # Iterates over each site of the alignment.
419
+ # It yields a Bio::Alignment::Site object (which inherits Array).
420
+ # It returns self.
421
+ def each_site
422
+ cp = get_all_property
423
+ (0...alignment_length).each do |i|
424
+ site = _alignment_site(i)
425
+ site.set_all_property(cp)
426
+ yield(site)
427
+ end
428
+ self
429
+ end
430
+
431
+ # Iterates over each site of the alignment, with specifying
432
+ # start, stop positions and step.
433
+ # It yields Bio::Alignment::Site object (which inherits Array).
434
+ # It returns self.
435
+ # It is same as
436
+ # <tt>start.step(stop, step) { |i| yield alignment_site(i) }</tt>.
437
+ def each_site_step(start, stop, step = 1)
438
+ cp = get_all_property
439
+ start.step(stop, step) do |i|
440
+ site = _alignment_site(i)
441
+ site.set_all_property(cp)
442
+ yield(site)
443
+ end
444
+ self
445
+ end
446
+
447
+ # Iterates over each sequence and results running blocks
448
+ # are collected and returns a new alignment as a
449
+ # Bio::Alignment::SequenceArray object.
450
+ #
451
+ # Note that it would be redefined if you want to change
452
+ # return value's class.
453
+ #
454
+ def alignment_collect
455
+ a = SequenceArray.new
456
+ a.set_all_property(get_all_property)
457
+ each_seq do |str|
458
+ a << yield(str)
459
+ end
460
+ a
461
+ end
462
+
463
+ # Returns specified range of the alignment.
464
+ # For each sequence, the '[]' method (it may be String#[])
465
+ # is executed, and returns a new alignment
466
+ # as a Bio::Alignment::SequenceArray object.
467
+ #
468
+ # Unlike alignment_slice method, the result alignment are
469
+ # guaranteed to contain String object if the range specified
470
+ # is out of range.
471
+ #
472
+ # If you want to change return value's class, you should redefine
473
+ # alignment_collect method.
474
+ #
475
+ def alignment_window(*arg)
476
+ alignment_collect do |s|
477
+ s[*arg] or seqclass.new('')
478
+ end
479
+ end
480
+ alias window alignment_window
481
+
482
+ # Iterates over each sliding window of the alignment.
483
+ # window_size is the size of sliding window.
484
+ # step is the step of each sliding.
485
+ # It yields a Bio::Alignment::SequenceArray object which contains
486
+ # each sliding window.
487
+ # It returns a Bio::Alignment::SequenceArray object which contains
488
+ # remainder alignment at the terminal end.
489
+ # If window_size is smaller than 0, it returns nil.
490
+ def each_window(window_size, step_size = 1)
491
+ return nil if window_size < 0
492
+ if step_size >= 0 then
493
+ i = nil
494
+ 0.step(alignment_length - window_size, step_size) do |i|
495
+ yield alignment_window(i, window_size)
496
+ end
497
+ alignment_window((i+window_size)..-1)
498
+ else
499
+ i = alignment_length - window_size
500
+ while i >= 0
501
+ yield alignment_window(i, window_size)
502
+ i += step_size
503
+ end
504
+ alignment_window(0...(i-step_size))
505
+ end
506
+ end
507
+
508
+ # Iterates over each site of the alignment and results running the
509
+ # block are collected and returns an array.
510
+ # It yields a Bio::Alignment::Site object.
511
+ def collect_each_site
512
+ ary = []
513
+ each_site do |site|
514
+ ary << yield(site)
515
+ end
516
+ ary
517
+ end
518
+
519
+ # Helper method for calculating consensus sequence.
520
+ # It iterates over each site of the alignment.
521
+ # In each site, gaps will be removed if specified with opt.
522
+ # It yields a Bio::Alignment::Site object.
523
+ # Results running the block (String objects are expected)
524
+ # are joined to a string and it returns the string.
525
+ #
526
+ # opt[:gap_mode] ==> 0 -- gaps are regarded as normal characters
527
+ # 1 -- a site within gaps is regarded as a gap
528
+ # -1 -- gaps are eliminated from consensus calculation
529
+ # default: 0
530
+ #
531
+ def consensus_each_site(opt = {})
532
+ mchar = (opt[:missing_char] or self.missing_char)
533
+ gap_mode = opt[:gap_mode]
534
+ case gap_mode
535
+ when 0, nil
536
+ collect_each_site do |a|
537
+ yield(a) or mchar
538
+ end.join('')
539
+ when 1
540
+ collect_each_site do |a|
541
+ a.has_gap? ? gap_char : (yield(a) or mchar)
542
+ end.join('')
543
+ when -1
544
+ collect_each_site do |a|
545
+ a.remove_gaps!
546
+ a.empty? ? gap_char : (yield(a) or mchar)
547
+ end.join('')
548
+ else
549
+ raise ':gap_mode must be 0, 1 or -1'
550
+ end
551
+ end
552
+
553
+ # Returns the consensus string of the alignment.
554
+ # 0.0 <= threshold <= 1.0 is expected.
555
+ #
556
+ # It resembles the BioPerl's AlignI::consensus_string method.
557
+ #
558
+ # Please refer to the consensus_each_site method for opt.
559
+ #
560
+ def consensus_string(threshold = 1.0, opt = {})
561
+ consensus_each_site(opt) do |a|
562
+ a.consensus_string(threshold)
563
+ end
564
+ end
565
+
566
+ # Returns the IUPAC consensus string of the alignment
567
+ # of nucleic-acid sequences.
568
+ #
569
+ # It resembles the BioPerl's AlignI::consensus_iupac method.
570
+ #
571
+ # Please refer to the consensus_each_site method for opt.
572
+ #
573
+ def consensus_iupac(opt = {})
574
+ consensus_each_site(opt) do |a|
575
+ a.consensus_iupac
576
+ end
577
+ end
578
+
579
+ # Returns the match line stirng of the alignment
580
+ # of amino-acid sequences.
581
+ #
582
+ # It resembles the BioPerl's AlignI::match_line method.
583
+ #
584
+ # opt[:match_line_char] ==> 100% equal default: '*'
585
+ # opt[:strong_match_char] ==> strong match default: ':'
586
+ # opt[:weak_match_char] ==> weak match default: '.'
587
+ # opt[:mismatch_char] ==> mismatch default: ' '
588
+ #
589
+ # More opt can be accepted.
590
+ # Please refer to the consensus_each_site method for opt.
591
+ #
592
+ def match_line_amino(opt = {})
593
+ collect_each_site do |a|
594
+ a.match_line_amino(opt)
595
+ end.join('')
596
+ end
597
+
598
+ # Returns the match line stirng of the alignment
599
+ # of nucleic-acid sequences.
600
+ #
601
+ # It resembles the BioPerl's AlignI::match_line method.
602
+ #
603
+ # opt[:match_line_char] ==> 100% equal default: '*'
604
+ # opt[:mismatch_char] ==> mismatch default: ' '
605
+ #
606
+ # More opt can be accepted.
607
+ # Please refer to the consensus_each_site method for opt.
608
+ #
609
+ def match_line_nuc(opt = {})
610
+ collect_each_site do |a|
611
+ a.match_line_nuc(opt)
612
+ end.join('')
613
+ end
614
+
615
+ # Returns the match line stirng of the alignment
616
+ # of nucleic- or amino-acid sequences.
617
+ # The type of the sequence is automatically determined
618
+ # or you can specify with opt[:type].
619
+ #
620
+ # It resembles the BioPerl's AlignI::match_line method.
621
+ #
622
+ # opt[:type] ==> :na or :aa (or determined by sequence class)
623
+ # opt[:match_line_char] ==> 100% equal default: '*'
624
+ # opt[:strong_match_char] ==> strong match default: ':'
625
+ # opt[:weak_match_char] ==> weak match default: '.'
626
+ # opt[:mismatch_char] ==> mismatch default: ' '
627
+ # :strong_ and :weak_match_char are used only in amino mode (:aa)
628
+ #
629
+ # More opt can be accepted.
630
+ # Please refer to the consensus_each_site method for opt.
631
+ #
632
+ def match_line(opt = {})
633
+ case opt[:type]
634
+ when :aa
635
+ amino = true
636
+ when :na, :dna, :rna
637
+ amino = false
638
+ else
639
+ if seqclass == Bio::Sequence::AA then
640
+ amino = true
641
+ elsif seqclass == Bio::Sequence::NA then
642
+ amino = false
643
+ elsif self.find { |x| /[EFILPQ]/i =~ x } then
644
+ amino = true
645
+ else
646
+ amino = nil
647
+ end
648
+ end
649
+ if amino then
650
+ match_line_amino(opt)
651
+ else
652
+ match_line_nuc(opt)
653
+ end
654
+ end
655
+
656
+ # This is the BioPerl's AlignI::match like method.
657
+ #
658
+ # Changes second to last sequences' sites to match_char(default: '.')
659
+ # when a site is equeal to the first sequence's corresponding site.
660
+ #
661
+ # Note that it is a destructive method.
662
+ #
663
+ # For Hash, please use it carefully because
664
+ # the order of the sequences is inconstant.
665
+ #
666
+ def convert_match(match_char = '.')
667
+ #(BioPerl) AlignI::match like method
668
+ len = alignment_length
669
+ firstseq = nil
670
+ each_seq do |s|
671
+ unless firstseq then
672
+ firstseq = s
673
+ else
674
+ (0...len).each do |i|
675
+ if s[i] and firstseq[i] == s[i] and !is_gap?(firstseq[i..i])
676
+ s[i..i] = match_char
677
+ end
678
+ end
679
+ end
680
+ end
681
+ self
682
+ end
683
+
684
+ # This is the BioPerl's AlignI::unmatch like method.
685
+ #
686
+ # Changes second to last sequences' sites match_char(default: '.')
687
+ # to original sites' characters.
688
+ #
689
+ # Note that it is a destructive method.
690
+ #
691
+ # For Hash, please use it carefully because
692
+ # the order of the sequences is inconstant.
693
+ #
694
+ def convert_unmatch(match_char = '.')
695
+ #(BioPerl) AlignI::unmatch like method
696
+ len = alignment_length
697
+ firstseq = nil
698
+ each_seq do |s|
699
+ unless firstseq then
700
+ firstseq = s
701
+ else
702
+ (0...len).each do |i|
703
+ if s[i..i] == match_char then
704
+ s[i..i] = (firstseq[i..i] or match_char)
705
+ end
706
+ end
707
+ end
708
+ end
709
+ self
710
+ end
711
+
712
+ # Fills gaps to the tail of each sequence if the length of
713
+ # the sequence is shorter than the alignment length.
714
+ #
715
+ # Note that it is a destructive method.
716
+ def alignment_normalize!
717
+ #(original)
718
+ len = alignment_length
719
+ each_seq do |s|
720
+ s << (gap_char * (len - s.length)) if s.length < len
721
+ end
722
+ self
723
+ end
724
+ alias normalize! alignment_normalize!
725
+
726
+ # Removes excess gaps in the tail of the sequences.
727
+ # If removes nothing, returns nil.
728
+ # Otherwise, returns self.
729
+ #
730
+ # Note that it is a destructive method.
731
+ def alignment_rstrip!
732
+ #(String-like)
733
+ len = alignment_length
734
+ newlen = len
735
+ each_site_step(len - 1, 0, -1) do |a|
736
+ a.remove_gaps!
737
+ if a.empty? then
738
+ newlen -= 1
739
+ else
740
+ break
741
+ end
742
+ end
743
+ return nil if newlen >= len
744
+ each_seq do |s|
745
+ s[newlen..-1] = '' if s.length > newlen
746
+ end
747
+ self
748
+ end
749
+ alias rstrip! alignment_rstrip!
750
+
751
+ # Removes excess gaps in the head of the sequences.
752
+ # If removes nothing, returns nil.
753
+ # Otherwise, returns self.
754
+ #
755
+ # Note that it is a destructive method.
756
+ def alignment_lstrip!
757
+ #(String-like)
758
+ pos = 0
759
+ each_site do |a|
760
+ a.remove_gaps!
761
+ if a.empty?
762
+ pos += 1
763
+ else
764
+ break
765
+ end
766
+ end
767
+ return nil if pos <= 0
768
+ each_seq { |s| s[0, pos] = '' }
769
+ self
770
+ end
771
+ alias lstrip! alignment_lstrip!
772
+
773
+ # Removes excess gaps in the sequences.
774
+ # If removes nothing, returns nil.
775
+ # Otherwise, returns self.
776
+ #
777
+ # Note that it is a destructive method.
778
+ def alignment_strip!
779
+ #(String-like)
780
+ r = alignment_rstrip!
781
+ l = alignment_lstrip!
782
+ (r or l)
783
+ end
784
+ alias strip! alignment_strip!
785
+
786
+ # Completely removes ALL gaps in the sequences.
787
+ # If removes nothing, returns nil.
788
+ # Otherwise, returns self.
789
+ #
790
+ # Note that it is a destructive method.
791
+ def remove_all_gaps!
792
+ ret = nil
793
+ each_seq do |s|
794
+ x = s.gsub!(gap_regexp, '')
795
+ ret ||= x
796
+ end
797
+ ret ? self : nil
798
+ end
799
+
800
+ # Returns the specified range of the alignment.
801
+ # For each sequence, the 'slice' method (it may be String#slice,
802
+ # which is the same as String#[]) is executed, and
803
+ # returns a new alignment as a Bio::Alignment::SequenceArray object.
804
+ #
805
+ # Unlike alignment_window method, the result alignment
806
+ # might contain nil.
807
+ #
808
+ # If you want to change return value's class, you should redefine
809
+ # alignment_collect method.
810
+ #
811
+ def alignment_slice(*arg)
812
+ #(String-like)
813
+ #(BioPerl) AlignI::slice like method
814
+ alignment_collect do |s|
815
+ s.slice(*arg)
816
+ end
817
+ end
818
+ alias slice alignment_slice
819
+
820
+ # For each sequence, the 'subseq' method (Bio::Seqeunce#subseq is
821
+ # expected) is executed, and returns a new alignment as
822
+ # a Bio::Alignment::SequenceArray object.
823
+ #
824
+ # All sequences in the alignment are expected to be kind of
825
+ # Bio::Sequence objects.
826
+ #
827
+ # Unlike alignment_window method, the result alignment
828
+ # might contain nil.
829
+ #
830
+ # If you want to change return value's class, you should redefine
831
+ # alignment_collect method.
832
+ #
833
+ def alignment_subseq(*arg)
834
+ #(original)
835
+ alignment_collect do |s|
836
+ s.subseq(*arg)
837
+ end
838
+ end
839
+ alias subseq alignment_subseq
840
+
841
+ # Concatenates the given alignment.
842
+ # <em>align</em> must have <tt>each_seq</tt>
843
+ # or <tt>each</tt> method.
844
+ #
845
+ # Returns self.
846
+ #
847
+ # Note that it is a destructive method.
848
+ #
849
+ # For Hash, please use it carefully because
850
+ # the order of the sequences is inconstant and
851
+ # key information is completely ignored.
852
+ #
853
+ def alignment_concat(align)
854
+ flag = nil
855
+ a = []
856
+ each_seq { |s| a << s }
857
+ i = 0
858
+ begin
859
+ align.each_seq do |seq|
860
+ flag = true
861
+ a[i].concat(seq) if a[i] and seq
862
+ i += 1
863
+ end
864
+ return self
865
+ rescue NoMethodError, ArgumentError => evar
866
+ raise evar if flag
867
+ end
868
+ align.each do |seq|
869
+ a[i].concat(seq) if a[i] and seq
870
+ i += 1
871
+ end
872
+ self
873
+ end
874
+ end #module EnumerableExtension
875
+
876
+ # ClustalWFormatter is a module to create ClustalW-formatted text
877
+ # from an alignment object.
878
+ #
879
+ # It will be obsoleted and the methods will be frequently changed.
880
+ module ClustalWFormatter
881
+ # Check whether there are same names.
882
+ #
883
+ # array:: names of the sequences (array of string)
884
+ # len:: length to check (default:30)
885
+ def have_same_name?(array, len = 30)
886
+ na30 = array.collect do |k|
887
+ k.to_s.split(/[\x00\s]/)[0].to_s[0, len].gsub(/\:\;\,\(\)/, '_').to_s
888
+ end
889
+ #p na30
890
+ na30idx = (0...(na30.size)).to_a
891
+ na30idx.sort! do |x,y|
892
+ na30[x] <=> na30[y]
893
+ end
894
+ #p na30idx
895
+ y = nil
896
+ dupidx = []
897
+ na30idx.each do |x|
898
+ if y and na30[y] == na30[x] then
899
+ dupidx << y
900
+ dupidx << x
901
+ end
902
+ y = x
903
+ end
904
+ if dupidx.size > 0 then
905
+ dupidx.sort!
906
+ dupidx.uniq!
907
+ dupidx
908
+ else
909
+ false
910
+ end
911
+ end
912
+ private :have_same_name?
913
+
914
+ # Changes sequence names if there are conflicted names.
915
+ #
916
+ # array:: names of the sequences (array of string)
917
+ # len:: length to check (default:30)
918
+ def avoid_same_name(array, len = 30)
919
+ na = array.collect { |k| k.to_s.gsub(/[\r\n\x00]/, ' ') }
920
+ if dupidx = have_same_name?(na, len)
921
+ procs = [
922
+ Proc.new { |s, i|
923
+ s[0, len].to_s.gsub(/\s/, '_') + s[len..-1].to_s
924
+ },
925
+ # Proc.new { |s, i|
926
+ # "#{i}_#{s}"
927
+ # },
928
+ ]
929
+ procs.each do |pr|
930
+ dupidx.each do |i|
931
+ s = array[i]
932
+ na[i] = pr.call(s.to_s, i)
933
+ end
934
+ dupidx = have_same_name?(na, len)
935
+ break unless dupidx
936
+ end
937
+ if dupidx then
938
+ na.each_with_index do |s, i|
939
+ na[i] = "#{i}_#{s}"
940
+ end
941
+ end
942
+ end
943
+ na
944
+ end
945
+ private :avoid_same_name
946
+
947
+ # Generates ClustalW-formatted text
948
+ # seqs:: sequences (must be an alignment object)
949
+ # names:: names of the sequences
950
+ # options:: options
951
+ def clustalw_formatter(seqs, names, options = {})
952
+ #(original)
953
+ aln = [ "CLUSTAL (0.00) multiple sequence alignment\n\n" ]
954
+ len = seqs.seq_length
955
+ sn = names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') }
956
+ if options[:replace_space]
957
+ sn.collect! { |x| x.gsub(/\s/, '_') }
958
+ end
959
+ if !options.has_key?(:escape) or options[:escape]
960
+ sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
961
+ end
962
+ if !options.has_key?(:split) or options[:split]
963
+ sn.collect! { |x| x.split(/\s/)[0].to_s }
964
+ end
965
+ if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
966
+ sn = avoid_same_name(sn)
967
+ end
968
+
969
+ if sn.find { |x| x.length > 10 } then
970
+ seqwidth = 50
971
+ namewidth = 30
972
+ sep = ' ' * 6
973
+ else
974
+ seqwidth = 60
975
+ namewidth = 10
976
+ sep = ' ' * 6
977
+ end
978
+ seqregexp = Regexp.new("(.{1,#{seqwidth}})")
979
+ gchar = (options[:gap_char] or '-')
980
+
981
+ case options[:type].to_s
982
+ when /protein/i, /aa/i
983
+ mopt = { :type => :aa }
984
+ when /na/i
985
+ mopt = { :type => :na }
986
+ else
987
+ mopt = {}
988
+ end
989
+ mline = (options[:match_line] or seqs.match_line(mopt))
990
+
991
+ aseqs = seqs.collect do |s|
992
+ s.to_s.gsub(seqs.gap_regexp, gchar)
993
+ end
994
+ case options[:case].to_s
995
+ when /lower/i
996
+ aseqs.each { |s| s.downcase! }
997
+ when /upper/i
998
+ aseqs.each { |s| s.upcase! }
999
+ end
1000
+
1001
+ aseqs << mline
1002
+ aseqs.collect! do |s|
1003
+ snx = sn.shift
1004
+ head = sprintf("%*s", -namewidth, snx.to_s)[0, namewidth] + sep
1005
+ s << (gchar * (len - s.length))
1006
+ s.gsub!(seqregexp, "\\1\n")
1007
+ a = s.split(/^/)
1008
+ if options[:seqnos] and snx then
1009
+ i = 0
1010
+ a.each do |x|
1011
+ x.chomp!
1012
+ l = x.tr(gchar, '').length
1013
+ i += l
1014
+ x.concat(l > 0 ? " #{i}\n" : "\n")
1015
+ end
1016
+ end
1017
+ a.collect { |x| head + x }
1018
+ end
1019
+ lines = (len + seqwidth - 1).div(seqwidth)
1020
+ lines.times do
1021
+ aln << "\n"
1022
+ aseqs.each { |a| aln << a.shift }
1023
+ end
1024
+ aln.join('')
1025
+ end
1026
+ private :clustalw_formatter
1027
+ end #module ClustalWFormatter
1028
+
1029
+
1030
+ # Bio::Alignment::ArrayExtension is a set of useful methods for
1031
+ # multiple sequence alignment.
1032
+ # It is designed to be extended to array objects or
1033
+ # included in your own classes which inherit Array.
1034
+ # (It can also be included in Array, though not recommended.)
1035
+ #
1036
+ # It possesses all methods defined in EnumerableExtension.
1037
+ # For usage of methods, please refer to EnumerableExtension.
1038
+ module ArrayExtension
1039
+ include EnumerableExtension
1040
+
1041
+ # Iterates over each sequences.
1042
+ # Yields a sequence.
1043
+ #
1044
+ # It works the same as Array#each.
1045
+ def each_seq(&block) #:yields: seq
1046
+ each(&block)
1047
+ end
1048
+
1049
+ include ClustalWFormatter
1050
+ # Returns a string of Clustal W formatted text of the alignment.
1051
+ def to_clustal(options = {})
1052
+ clustalw_formatter(self, (0...(self.size)).to_a, options)
1053
+ end
1054
+ end #module ArrayExtension
1055
+
1056
+ # Bio::Alignment::HashExtension is a set of useful methods for
1057
+ # multiple sequence alignment.
1058
+ # It is designed to be extended to hash objects or
1059
+ # included in your own classes which inherit Hash.
1060
+ # (It can also be included in Hash, though not recommended.)
1061
+ #
1062
+ # It possesses all methods defined in EnumerableExtension.
1063
+ # For usage of methods, please refer to EnumerableExtension.
1064
+ #
1065
+ # Because SequenceHash#alignment_collect is redefined,
1066
+ # some methods' return value's class are changed to
1067
+ # SequenceHash instead of SequenceArray.
1068
+ #
1069
+ # Because the order of the objects in a hash is inconstant,
1070
+ # some methods strictly affected with the order of objects
1071
+ # might not work correctly,
1072
+ # e.g. EnumerableExtension#convert_match and #convert_unmatch.
1073
+ module HashExtension
1074
+ include EnumerableExtension
1075
+
1076
+ # Iterates over each sequences.
1077
+ # Yields a sequence.
1078
+ #
1079
+ # It works the same as Hash#each_value.
1080
+ def each_seq(&block) #:yields: seq
1081
+ each_value(&block)
1082
+ end
1083
+
1084
+ # Iterates over each sequence and each results running block
1085
+ # are collected and returns a new alignment as a
1086
+ # Bio::Alignment::SequenceHash object.
1087
+ #
1088
+ # Note that it would be redefined if you want to change
1089
+ # return value's class.
1090
+ #
1091
+ def alignment_collect
1092
+ a = SequenceHash.new
1093
+ a.set_all_property(get_all_property)
1094
+ each_pair do |key, str|
1095
+ a.store(key, yield(str))
1096
+ end
1097
+ a
1098
+ end
1099
+
1100
+ # Concatenates the given alignment.
1101
+ # If <em>align</em> is a Hash (or SequenceHash),
1102
+ # sequences of same keys are concatenated.
1103
+ # Otherwise, <em>align</em> must have <tt>each_seq</tt>
1104
+ # or <tt>each</tt> method and
1105
+ # works same as EnumerableExtension#alignment_concat.
1106
+ #
1107
+ # Returns self.
1108
+ #
1109
+ # Note that it is a destructive method.
1110
+ #
1111
+ def alignment_concat(align)
1112
+ flag = nil
1113
+ begin
1114
+ align.each_pair do |key, seq|
1115
+ flag = true
1116
+ if origseq = self[key]
1117
+ origseq.concat(seq)
1118
+ end
1119
+ end
1120
+ return self
1121
+ rescue NoMethodError, ArgumentError =>evar
1122
+ raise evar if flag
1123
+ end
1124
+ a = values
1125
+ i = 0
1126
+ begin
1127
+ align.each_seq do |seq|
1128
+ flag = true
1129
+ a[i].concat(seq) if a[i] and seq
1130
+ i += 1
1131
+ end
1132
+ return self
1133
+ rescue NoMethodError, ArgumentError => evar
1134
+ raise evar if flag
1135
+ end
1136
+ align.each do |seq|
1137
+ a[i].concat(seq) if a[i] and seq
1138
+ i += 1
1139
+ end
1140
+ self
1141
+ end
1142
+
1143
+ include ClustalWFormatter
1144
+ # Returns a string of Clustal W formatted text of the alignment.
1145
+ def to_clustal(options = {})
1146
+ seqs = SequenceArray.new
1147
+ names = self.keys
1148
+ names.each do |k|
1149
+ seqs << self[k]
1150
+ end
1151
+ clustalw_formatter(seqs, names, options)
1152
+ end
1153
+ end #module HashExtension
1154
+
1155
+ # Bio::Alignment::SequenceArray is a container class of
1156
+ # multiple sequence alignment.
1157
+ # Since it inherits Array, it acts completely same as Array.
1158
+ # In addition, methods defined in ArrayExtension and EnumerableExtension
1159
+ # can be used.
1160
+ class SequenceArray < Array
1161
+ include ArrayExtension
1162
+ end #class SequenceArray
1163
+
1164
+ # Bio::Alignment::SequenceHash is a container class of
1165
+ # multiple sequence alignment.
1166
+ # Since it inherits Hash, it acts completely same as Hash.
1167
+ # In addition, methods defined in HashExtension and EnumerableExtension
1168
+ # can be used.
1169
+ class SequenceHash < Hash
1170
+ include HashExtension
1171
+ end #class SequenceHash
1172
+
1173
+ # Bio::Alignment::OriginalPrivate is a set of private methods
1174
+ # for Bio::Alignment::OriginalAlignment.
1175
+ module OriginalPrivate
1176
+
1177
+ # Gets the sequence from given object.
1178
+ def extract_seq(obj)
1179
+ seq = nil
1180
+ if obj.is_a?(Bio::Sequence) then
1181
+ seq = obj
1182
+ else
1183
+ for m in [ :seq, :naseq, :aaseq ]
1184
+ begin
1185
+ seq = obj.send(m)
1186
+ rescue NameError, ArgumentError
1187
+ seq = nil
1188
+ end
1189
+ break if seq
1190
+ end
1191
+ seq = obj unless seq
1192
+ end
1193
+ seq
1194
+ end
1195
+ module_function :extract_seq
1196
+
1197
+ # Gets the name or the definition of the sequence from given object.
1198
+ def extract_key(obj)
1199
+ sn = nil
1200
+ for m in [ :definition, :entry_id ]
1201
+ begin
1202
+ sn = obj.send(m)
1203
+ rescue NameError, ArgumentError
1204
+ sn = nil
1205
+ end
1206
+ break if sn
1207
+ end
1208
+ sn
1209
+ end
1210
+ module_function :extract_key
1211
+ end #module OriginalPrivate
1212
+
1213
+ # Bio::Alignment::OriginalAlignment is
1214
+ # the BioRuby original multiple sequence alignment container class.
1215
+ # It includes HashExtension.
1216
+ #
1217
+ # It is recommended only to use methods defined in EnumerableExtension
1218
+ # (and the each_seq method).
1219
+ # The method only defined in this class might be obsoleted in the future.
1220
+ #
1221
+ class OriginalAlignment
1222
+
1223
+ include Enumerable
1224
+ include HashExtension
1225
+ include OriginalPrivate
1226
+
1227
+ # Read files and creates a new alignment object.
1228
+ #
1229
+ # It will be obsoleted.
1230
+ def self.readfiles(*files)
1231
+ require 'bio/io/flatfile'
1232
+ aln = self.new
1233
+ files.each do |fn|
1234
+ Bio::FlatFile.open(nil, fn) do |ff|
1235
+ aln.add_sequences(ff)
1236
+ end
1237
+ end
1238
+ aln
1239
+ end
1240
+
1241
+ # Creates a new alignment object from given arguments.
1242
+ #
1243
+ # It will be obsoleted.
1244
+ def self.new2(*arg)
1245
+ self.new(arg)
1246
+ end
1247
+
1248
+ # Creates a new alignment object.
1249
+ # <em>seqs</em> may be one of follows:
1250
+ # an array of sequences (or strings),
1251
+ # an array of sequence database objects,
1252
+ # an alignment object.
1253
+ def initialize(seqs = [])
1254
+ @seqs = {}
1255
+ @keys = []
1256
+ self.add_sequences(seqs)
1257
+ end
1258
+
1259
+ # If <em>x</em> is the same value, returns true.
1260
+ # Otherwise, returns false.
1261
+ def ==(x)
1262
+ #(original)
1263
+ if x.is_a?(self.class)
1264
+ self.to_hash == x.to_hash
1265
+ else
1266
+ false
1267
+ end
1268
+ end
1269
+
1270
+ # convert to hash
1271
+ def to_hash
1272
+ #(Hash-like)
1273
+ @seqs
1274
+ end
1275
+
1276
+ # Adds sequences to the alignment.
1277
+ # <em>seqs</em> may be one of follows:
1278
+ # an array of sequences (or strings),
1279
+ # an array of sequence database objects,
1280
+ # an alignment object.
1281
+ def add_sequences(seqs)
1282
+ if block_given? then
1283
+ seqs.each do |x|
1284
+ s, key = yield x
1285
+ self.store(key, s)
1286
+ end
1287
+ else
1288
+ if seqs.is_a?(self.class) then
1289
+ seqs.each_pair do |k, s|
1290
+ self.store(k, s)
1291
+ end
1292
+ elsif seqs.respond_to?(:each_pair)
1293
+ seqs.each_pair do |k, x|
1294
+ s = extract_seq(x)
1295
+ self.store(k, s)
1296
+ end
1297
+ else
1298
+ seqs.each do |x|
1299
+ s = extract_seq(x)
1300
+ k = extract_key(x)
1301
+ self.store(k, s)
1302
+ end
1303
+ end
1304
+ end
1305
+ self
1306
+ end
1307
+
1308
+ # identifiers (or definitions or names) of the sequences
1309
+ attr_reader :keys
1310
+
1311
+ # stores a sequences with the name
1312
+ # key:: name of the sequence
1313
+ # seq:: sequence
1314
+ def __store__(key, seq)
1315
+ #(Hash-like)
1316
+ h = { key => seq }
1317
+ @keys << h.keys[0]
1318
+ @seqs.update(h)
1319
+ seq
1320
+ end
1321
+
1322
+ # stores a sequence with <em>key</em>
1323
+ # (name or definition of the sequence).
1324
+ # Unlike <tt>__store__</tt> method, the method doesn't allow
1325
+ # same keys.
1326
+ # If the key is already used, returns nil.
1327
+ # When succeeded, returns key.
1328
+ def store(key, seq)
1329
+ #(Hash-like) returns key instead of seq
1330
+ if @seqs.has_key?(key) then
1331
+ # don't allow same key
1332
+ # New key is discarded, while existing key is preserved.
1333
+ key = nil
1334
+ end
1335
+ unless key then
1336
+ unless defined?(@serial)
1337
+ @serial = 0
1338
+ end
1339
+ @serial = @seqs.size if @seqs.size > @serial
1340
+ while @seqs.has_key?(@serial)
1341
+ @serial += 1
1342
+ end
1343
+ key = @serial
1344
+ end
1345
+ self.__store__(key, seq)
1346
+ key
1347
+ end
1348
+
1349
+ # Reconstructs internal data structure.
1350
+ # (Like Hash#rehash)
1351
+ def rehash
1352
+ @seqs.rehash
1353
+ oldkeys = @keys
1354
+ tmpkeys = @seqs.keys
1355
+ @keys.collect! do |k|
1356
+ tmpkeys.delete(k)
1357
+ end
1358
+ @keys.compact!
1359
+ @keys.concat(tmpkeys)
1360
+ self
1361
+ end
1362
+
1363
+ # Prepends seq (with key) to the front of the alignment.
1364
+ # (Like Array#unshift)
1365
+ def unshift(key, seq)
1366
+ #(Array-like)
1367
+ self.store(key, seq)
1368
+ k = @keys.pop
1369
+ @keys.unshift(k)
1370
+ k
1371
+ end
1372
+
1373
+ # Removes the first sequence in the alignment and
1374
+ # returns [ key, seq ].
1375
+ def shift
1376
+ k = @keys.shift
1377
+ if k then
1378
+ s = @seqs.delete(k)
1379
+ [ k, s ]
1380
+ else
1381
+ nil
1382
+ end
1383
+ end
1384
+
1385
+ # Gets the <em>n</em>-th sequence.
1386
+ # If not found, returns nil.
1387
+ def order(n)
1388
+ #(original)
1389
+ @seqs[@keys[n]]
1390
+ end
1391
+
1392
+ # Removes the sequence whose key is <em>key</em>.
1393
+ # Returns the removed sequence.
1394
+ # If not found, returns nil.
1395
+ def delete(key)
1396
+ #(Hash-like)
1397
+ @keys.delete(key)
1398
+ @seqs.delete(key)
1399
+ end
1400
+
1401
+ # Returns sequences. (Like Hash#values)
1402
+ def values
1403
+ #(Hash-like)
1404
+ @keys.collect { |k| @seqs[k] }
1405
+ end
1406
+
1407
+ # Adds a sequence without key.
1408
+ # The key is automatically determined.
1409
+ def <<(seq)
1410
+ #(Array-like)
1411
+ self.store(nil, seq)
1412
+ self
1413
+ end
1414
+
1415
+ # Gets a sequence. (Like Hash#[])
1416
+ def [](*arg)
1417
+ #(Hash-like)
1418
+ @seqs[*arg]
1419
+ end
1420
+
1421
+ # Number of sequences in the alignment.
1422
+ def size
1423
+ #(Hash&Array-like)
1424
+ @seqs.size
1425
+ end
1426
+
1427
+ # If the key exists, returns true. Otherwise, returns false.
1428
+ # (Like Hash#has_key?)
1429
+ def has_key?(key)
1430
+ #(Hash-like)
1431
+ @seqs.has_key?(key)
1432
+ end
1433
+
1434
+ # Iterates over each sequence.
1435
+ # (Like Array#each)
1436
+ def each
1437
+ #(Array-like)
1438
+ @keys.each do |k|
1439
+ yield @seqs[k]
1440
+ end
1441
+ end
1442
+ alias each_seq each
1443
+
1444
+ # Iterates over each key and sequence.
1445
+ # (Like Hash#each_pair)
1446
+ def each_pair
1447
+ #(Hash-like)
1448
+ @keys.each do |k|
1449
+ yield k, @seqs[k]
1450
+ end
1451
+ end
1452
+
1453
+ # Iterates over each sequence, replacing the sequence with the
1454
+ # value returned by the block.
1455
+ def collect!
1456
+ #(Array-like)
1457
+ @keys.each do |k|
1458
+ @seqs[k] = yield @seqs[k]
1459
+ end
1460
+ end
1461
+
1462
+ ###--
1463
+ ### note that 'collect' and 'to_a' is defined in Enumerable
1464
+ ###
1465
+ ### instance-variable-related methods
1466
+ ###++
1467
+
1468
+ # Creates new alignment. Internal use only.
1469
+ def new(*arg)
1470
+ na = self.class.new(*arg)
1471
+ na.set_all_property(get_all_property)
1472
+ na
1473
+ end
1474
+ protected :new
1475
+
1476
+ # Duplicates the alignment
1477
+ def dup
1478
+ #(Hash-like)
1479
+ self.new(self)
1480
+ end
1481
+
1482
+ #--
1483
+ # methods below should not access instance variables
1484
+ #++
1485
+
1486
+ # Merges given alignment and returns a new alignment.
1487
+ def merge(*other)
1488
+ #(Hash-like)
1489
+ na = self.new(self)
1490
+ na.merge!(*other)
1491
+ na
1492
+ end
1493
+
1494
+ # Merge given alignment.
1495
+ # Note that it is destructive method.
1496
+ def merge!(*other)
1497
+ #(Hash-like)
1498
+ if block_given? then
1499
+ other.each do |aln|
1500
+ aln.each_pair do |k, s|
1501
+ if self.has_key?(k) then
1502
+ s = yield k, self[k], s
1503
+ self.to_hash.store(k, s)
1504
+ else
1505
+ self.store(k, s)
1506
+ end
1507
+ end
1508
+ end
1509
+ else
1510
+ other.each do |aln|
1511
+ aln.each_pair do |k, s|
1512
+ self.delete(k) if self.has_key?(k)
1513
+ self.store(k, s)
1514
+ end
1515
+ end
1516
+ end
1517
+ self
1518
+ end
1519
+
1520
+ # Returns the key for a given sequence. If not found, returns nil.
1521
+ def index(seq)
1522
+ #(Hash-like)
1523
+ k = nil
1524
+ self.each_pair do |k, s|
1525
+ if s.class == seq.class then
1526
+ r = (s == seq)
1527
+ else
1528
+ r = (s.to_s == seq.to_s)
1529
+ end
1530
+ break if r
1531
+ end
1532
+ k
1533
+ end
1534
+
1535
+ # Sequences in the alignment are duplicated.
1536
+ # If keys are given to the argument, sequences of given keys are
1537
+ # duplicated.
1538
+ #
1539
+ # It will be obsoleted.
1540
+ def isolate(*arg)
1541
+ #(original)
1542
+ if arg.size == 0 then
1543
+ self.collect! do |s|
1544
+ seqclass.new(s)
1545
+ end
1546
+ else
1547
+ arg.each do |k|
1548
+ if self.has_key?(k) then
1549
+ s = self.delete(key)
1550
+ self.store(k, seqclass.new(s))
1551
+ end
1552
+ end
1553
+ end
1554
+ self
1555
+ end
1556
+
1557
+ # Iterates over each sequence and each results running block
1558
+ # are collected and returns a new alignment.
1559
+ #
1560
+ # The method name 'collect_align' will be obsoleted.
1561
+ # Please use 'alignment_collect' instead.
1562
+ def alignment_collect
1563
+ #(original)
1564
+ na = self.class.new
1565
+ na.set_all_property(get_all_property)
1566
+ self.each_pair do |k, s|
1567
+ na.store(k, yield(s))
1568
+ end
1569
+ na
1570
+ end
1571
+ alias collect_align alignment_collect
1572
+
1573
+ # Removes empty sequences or nil in the alignment.
1574
+ # (Like Array#compact!)
1575
+ def compact!
1576
+ #(Array-like)
1577
+ d = []
1578
+ self.each_pair do |k, s|
1579
+ if !s or s.empty?
1580
+ d << k
1581
+ end
1582
+ end
1583
+ d.each do |k|
1584
+ self.delete(k)
1585
+ end
1586
+ d.empty? ? nil : d
1587
+ end
1588
+
1589
+ # Removes empty sequences or nil and returns new alignment.
1590
+ # (Like Array#compact)
1591
+ def compact
1592
+ #(Array-like)
1593
+ na = self.dup
1594
+ na.compact!
1595
+ na
1596
+ end
1597
+
1598
+ # Adds a sequence to the alignment.
1599
+ # Returns key if succeeded.
1600
+ # Returns nil (and not added to the alignment) if key is already used.
1601
+ #
1602
+ # It resembles BioPerl's AlignI::add_seq method.
1603
+ def add_seq(seq, key = nil)
1604
+ #(BioPerl) AlignI::add_seq like method
1605
+ unless seq.is_a?(Bio::Sequence) then
1606
+ s = extract_seq(seq)
1607
+ key = extract_key(seq) unless key
1608
+ seq = s
1609
+ end
1610
+ self.store(key, seq)
1611
+ end
1612
+
1613
+ # Removes given sequence from the alignment.
1614
+ # Returns removed sequence. If nothing removed, returns nil.
1615
+ #
1616
+ # It resembles BioPerl's AlignI::remove_seq.
1617
+ def remove_seq(seq)
1618
+ #(BioPerl) AlignI::remove_seq like method
1619
+ if k = self.index(seq) then
1620
+ self.delete(k)
1621
+ else
1622
+ nil
1623
+ end
1624
+ end
1625
+
1626
+ # Removes sequences from the alignment by given keys.
1627
+ # Returns an alignment object consists of removed sequences.
1628
+ #
1629
+ # It resembles BioPerl's AlignI::purge method.
1630
+ def purge(*arg)
1631
+ #(BioPerl) AlignI::purge like method
1632
+ purged = self.new
1633
+ arg.each do |k|
1634
+ if self[k] then
1635
+ purged.store(k, self.delete(k))
1636
+ end
1637
+ end
1638
+ purged
1639
+ end
1640
+
1641
+ # If block is given, it acts like Array#select (Enumerable#select).
1642
+ # Returns a new alignment containing all sequences of the alignment
1643
+ # for which return value of given block is not false nor nil.
1644
+ #
1645
+ # If no block is given, it acts like the BioPerl's AlignI::select.
1646
+ # Returns a new alignment containing sequences of given keys.
1647
+ #
1648
+ # The BioPerl's AlignI::select-like action will be obsoleted.
1649
+ def select(*arg)
1650
+ #(original)
1651
+ na = self.new
1652
+ if block_given? then
1653
+ # 'arg' is ignored
1654
+ # nearly same action as Array#select (Enumerable#select)
1655
+ self.each_pair.each do |k, s|
1656
+ na.store(k, s) if yield(s)
1657
+ end
1658
+ else
1659
+ # BioPerl's AlignI::select like function
1660
+ arg.each do |k|
1661
+ if s = self[k] then
1662
+ na.store(k, s)
1663
+ end
1664
+ end
1665
+ end
1666
+ na
1667
+ end
1668
+
1669
+ # The method name <tt>slice</tt> will be obsoleted.
1670
+ # Please use <tt>alignment_slice</tt> instead.
1671
+ alias slice alignment_slice
1672
+
1673
+ # The method name <tt>subseq</tt> will be obsoleted.
1674
+ # Please use <tt>alignment_subseq</tt> instead.
1675
+ alias subseq alignment_subseq
1676
+
1677
+ # Not-destructive version of alignment_normalize!.
1678
+ # Returns a new alignment.
1679
+ def normalize
1680
+ #(original)
1681
+ na = self.dup
1682
+ na.alignment_normalize!
1683
+ na
1684
+ end
1685
+
1686
+ # Not-destructive version of alignment_rstrip!.
1687
+ # Returns a new alignment.
1688
+ def rstrip
1689
+ #(String-like)
1690
+ na = self.dup
1691
+ na.isolate
1692
+ na.alignment_rstrip!
1693
+ na
1694
+ end
1695
+
1696
+ # Not-destructive version of alignment_lstrip!.
1697
+ # Returns a new alignment.
1698
+ def lstrip
1699
+ #(String-like)
1700
+ na = self.dup
1701
+ na.isolate
1702
+ na.alignment_lstrip!
1703
+ na
1704
+ end
1705
+
1706
+ # Not-destructive version of alignment_strip!.
1707
+ # Returns a new alignment.
1708
+ def strip
1709
+ #(String-like)
1710
+ na = self.dup
1711
+ na.isolate
1712
+ na.alignment_strip!
1713
+ na
1714
+ end
1715
+
1716
+ # Not-destructive version of remove_gaps!.
1717
+ # Returns a new alignment.
1718
+ #
1719
+ # The method name 'remove_gap' will be obsoleted.
1720
+ # Please use 'remove_all_gaps' instead.
1721
+ def remove_all_gaps
1722
+ #(original)
1723
+ na = self.dup
1724
+ na.isolate
1725
+ na.remove_all_gaps!
1726
+ na
1727
+ end
1728
+
1729
+ # Concatenates a string or an alignment.
1730
+ # Returns self.
1731
+ #
1732
+ # Note that the method will be obsoleted.
1733
+ # Please use <tt>each_seq { |s| s << str }</tt> for concatenating
1734
+ # a string and
1735
+ # <tt>alignment_concat(aln)</tt> for concatenating an alignment.
1736
+ def concat(aln)
1737
+ #(String-like)
1738
+ if aln.respond_to?(:to_str) then #aln.is_a?(String)
1739
+ self.each do |s|
1740
+ s << aln
1741
+ end
1742
+ self
1743
+ else
1744
+ alignment_concat(aln)
1745
+ end
1746
+ end
1747
+
1748
+ # Replace the specified region of the alignment to aln.
1749
+ # aln:: String or Bio::Alignment object
1750
+ # arg:: same format as String#slice
1751
+ #
1752
+ # It will be obsoleted.
1753
+ def replace_slice(aln, *arg)
1754
+ #(original)
1755
+ if aln.respond_to?(:to_str) then #aln.is_a?(String)
1756
+ self.each do |s|
1757
+ s[*arg] = aln
1758
+ end
1759
+ elsif aln.is_a?(self.class) then
1760
+ aln.each_pair do |k, s|
1761
+ self[k][*arg] = s
1762
+ end
1763
+ else
1764
+ i = 0
1765
+ aln.each do |s|
1766
+ self.order(i)[*arg] = s
1767
+ i += 1
1768
+ end
1769
+ end
1770
+ self
1771
+ end
1772
+
1773
+ # Performs multiple alignment by using external program.
1774
+ def do_align(factory)
1775
+ a0 = self.class.new
1776
+ (0...self.size).each { |i| a0.store(i, self.order(i)) }
1777
+ r = factory.query(a0)
1778
+ a1 = r.alignment
1779
+ a0.keys.each do |k|
1780
+ unless a1[k.to_s] then
1781
+ raise 'alignment result is inconsistent with input data'
1782
+ end
1783
+ end
1784
+ a2 = self.new
1785
+ a0.keys.each do |k|
1786
+ a2.store(self.keys[k], a1[k.to_s])
1787
+ end
1788
+ a2
1789
+ end
1790
+
1791
+ # Convert to fasta format and returns an array of strings.
1792
+ #
1793
+ # It will be obsoleted.
1794
+ def to_fasta_array(*arg)
1795
+ #(original)
1796
+ width = nil
1797
+ if arg[0].is_a?(Integer) then
1798
+ width = arg.shift
1799
+ end
1800
+ options = (arg.shift or {})
1801
+ width = options[:width] unless width
1802
+ if options[:avoid_same_name] then
1803
+ na = avoid_same_name(self.keys, 30)
1804
+ else
1805
+ na = self.keys.collect { |k| k.to_s.gsub(/[\r\n\x00]/, ' ') }
1806
+ end
1807
+ a = self.collect do |s|
1808
+ ">#{na.shift}\n" +
1809
+ if width then
1810
+ s.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
1811
+ else
1812
+ s.to_s + "\n"
1813
+ end
1814
+ end
1815
+ a
1816
+ end
1817
+
1818
+ # Convets to fasta format and returns an array of FastaFormat objects.
1819
+ #
1820
+ # It will be obsoleted.
1821
+ def to_fastaformat_array(*arg)
1822
+ #(original)
1823
+ require 'bio/db/fasta'
1824
+ a = self.to_fasta_array(*arg)
1825
+ a.collect! do |x|
1826
+ Bio::FastaFormat.new(x)
1827
+ end
1828
+ a
1829
+ end
1830
+
1831
+ # Converts to fasta format and returns a string.
1832
+ #
1833
+ # The specification of the argument will be changed.
1834
+ def to_fasta(*arg)
1835
+ #(original)
1836
+ self.to_fasta_array(*arg).join('')
1837
+ end
1838
+
1839
+ include ClustalWFormatter
1840
+ # Returns a string of Clustal W formatted text of the alignment.
1841
+ def to_clustal(options = {})
1842
+ clustalw_formatter(self, self.keys, options)
1843
+ end
1844
+
1845
+ # The method name <tt>consensus</tt> will be obsoleted.
1846
+ # Please use <tt>consensus_string</tt> instead.
1847
+ alias consensus consensus_string
1848
+ end #class OriginalAlignment
1849
+
1850
+ # Bio::Alignment::GAP is a set of class methods for
1851
+ # gap-related position translation.
1852
+ module GAP
1853
+ # position with gaps are translated into the position without gaps.
1854
+ #<em>seq</em>:: sequence
1855
+ #<em>pos</em>:: position with gaps
1856
+ #<em>gap_regexp</em>:: regular expression to specify gaps
1857
+ def ungapped_pos(seq, pos, gap_regexp)
1858
+ p = seq[0..pos].gsub(gap_regexp, '').length
1859
+ p -= 1 if p > 0
1860
+ p
1861
+ end
1862
+ module_function :ungapped_pos
1863
+
1864
+ # position without gaps are translated into the position with gaps.
1865
+ #<em>seq</em>:: sequence
1866
+ #<em>pos</em>:: position with gaps
1867
+ #<em>gap_regexp</em>:: regular expression to specify gaps
1868
+ def gapped_pos(seq, pos, gap_regexp)
1869
+ olen = seq.gsub(gap_regexp, '').length
1870
+ pos = olen if pos >= olen
1871
+ pos = olen + pos if pos < 0
1872
+
1873
+ i = 0
1874
+ l = pos + 1
1875
+ while l > 0 and i < seq.length
1876
+ x = seq[i, l].gsub(gap_regexp, '').length
1877
+ i += l
1878
+ l -= x
1879
+ end
1880
+ i -= 1 if i > 0
1881
+ i
1882
+ end
1883
+ module_function :gapped_pos
1884
+ end # module GAP
1885
+
1886
+ # creates a new Bio::Alignment::OriginalAlignment object.
1887
+ # Please refer document of OriginalAlignment.new.
1888
+ def self.new(*arg)
1889
+ OriginalAlignment.new(*arg)
1890
+ end
1891
+
1892
+ # creates a new Bio::Alignment::OriginalAlignment object.
1893
+ # Please refer document of OriginalAlignment.new2.
1894
+ def self.new2(*arg)
1895
+ OriginalAlignment.new2(*arg)
1896
+ end
1897
+
1898
+ # creates a new Bio::Alignment::OriginalAlignment object.
1899
+ # Please refer document of OriginalAlignment.readfiles.
1900
+ def self.readfiles(*files)
1901
+ OriginalAlignment.readfiles(*files)
1902
+ end
1903
+ end #module Alignment
1904
+
1905
+ end #module Bio
1906
+