wwood-bioruby 1.2.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (386) hide show
  1. data/README.rdoc +205 -0
  2. data/README_DEV.rdoc +285 -0
  3. data/VERSION.yml +4 -0
  4. data/bin/bioruby +44 -0
  5. data/bin/br_biofetch.rb +47 -0
  6. data/bin/br_bioflat.rb +293 -0
  7. data/bin/br_biogetseq.rb +45 -0
  8. data/bin/br_pmfetch.rb +421 -0
  9. data/lib/bio.rb +306 -0
  10. data/lib/bio/alignment.rb +2518 -0
  11. data/lib/bio/appl/bl2seq/report.rb +334 -0
  12. data/lib/bio/appl/blast.rb +505 -0
  13. data/lib/bio/appl/blast/ddbj.rb +142 -0
  14. data/lib/bio/appl/blast/format0.rb +1438 -0
  15. data/lib/bio/appl/blast/format8.rb +83 -0
  16. data/lib/bio/appl/blast/genomenet.rb +263 -0
  17. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  18. data/lib/bio/appl/blast/remote.rb +105 -0
  19. data/lib/bio/appl/blast/report.rb +767 -0
  20. data/lib/bio/appl/blast/rexml.rb +144 -0
  21. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  22. data/lib/bio/appl/blast/wublast.rb +635 -0
  23. data/lib/bio/appl/blast/xmlparser.rb +236 -0
  24. data/lib/bio/appl/blat/report.rb +530 -0
  25. data/lib/bio/appl/clustalw.rb +219 -0
  26. data/lib/bio/appl/clustalw/report.rb +152 -0
  27. data/lib/bio/appl/emboss.rb +203 -0
  28. data/lib/bio/appl/fasta.rb +235 -0
  29. data/lib/bio/appl/fasta/format10.rb +325 -0
  30. data/lib/bio/appl/gcg/msf.rb +212 -0
  31. data/lib/bio/appl/gcg/seq.rb +195 -0
  32. data/lib/bio/appl/genscan/report.rb +552 -0
  33. data/lib/bio/appl/hmmer.rb +126 -0
  34. data/lib/bio/appl/hmmer/report.rb +683 -0
  35. data/lib/bio/appl/iprscan/report.rb +374 -0
  36. data/lib/bio/appl/mafft.rb +259 -0
  37. data/lib/bio/appl/mafft/report.rb +226 -0
  38. data/lib/bio/appl/muscle.rb +52 -0
  39. data/lib/bio/appl/paml/baseml.rb +95 -0
  40. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  41. data/lib/bio/appl/paml/codeml.rb +242 -0
  42. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  43. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  44. data/lib/bio/appl/paml/common.rb +348 -0
  45. data/lib/bio/appl/paml/common_report.rb +38 -0
  46. data/lib/bio/appl/paml/yn00.rb +103 -0
  47. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  48. data/lib/bio/appl/phylip/alignment.rb +133 -0
  49. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  50. data/lib/bio/appl/probcons.rb +41 -0
  51. data/lib/bio/appl/psort.rb +548 -0
  52. data/lib/bio/appl/psort/report.rb +542 -0
  53. data/lib/bio/appl/pts1.rb +263 -0
  54. data/lib/bio/appl/sim4.rb +124 -0
  55. data/lib/bio/appl/sim4/report.rb +485 -0
  56. data/lib/bio/appl/sosui/report.rb +151 -0
  57. data/lib/bio/appl/spidey/report.rb +593 -0
  58. data/lib/bio/appl/targetp/report.rb +267 -0
  59. data/lib/bio/appl/tcoffee.rb +55 -0
  60. data/lib/bio/appl/tmhmm/report.rb +231 -0
  61. data/lib/bio/command.rb +593 -0
  62. data/lib/bio/compat/features.rb +157 -0
  63. data/lib/bio/compat/references.rb +128 -0
  64. data/lib/bio/data/aa.rb +353 -0
  65. data/lib/bio/data/codontable.rb +722 -0
  66. data/lib/bio/data/na.rb +223 -0
  67. data/lib/bio/db.rb +329 -0
  68. data/lib/bio/db/aaindex.rb +357 -0
  69. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  70. data/lib/bio/db/biosql/sequence.rb +508 -0
  71. data/lib/bio/db/embl/common.rb +352 -0
  72. data/lib/bio/db/embl/embl.rb +500 -0
  73. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  74. data/lib/bio/db/embl/format_embl.rb +190 -0
  75. data/lib/bio/db/embl/sptr.rb +1283 -0
  76. data/lib/bio/db/embl/swissprot.rb +42 -0
  77. data/lib/bio/db/embl/trembl.rb +41 -0
  78. data/lib/bio/db/embl/uniprot.rb +42 -0
  79. data/lib/bio/db/fantom.rb +597 -0
  80. data/lib/bio/db/fasta.rb +410 -0
  81. data/lib/bio/db/fasta/defline.rb +532 -0
  82. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  83. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  84. data/lib/bio/db/genbank/common.rb +307 -0
  85. data/lib/bio/db/genbank/ddbj.rb +22 -0
  86. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  87. data/lib/bio/db/genbank/genbank.rb +250 -0
  88. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  89. data/lib/bio/db/genbank/genpept.rb +60 -0
  90. data/lib/bio/db/genbank/refseq.rb +18 -0
  91. data/lib/bio/db/gff.rb +1846 -0
  92. data/lib/bio/db/go.rb +481 -0
  93. data/lib/bio/db/kegg/brite.rb +41 -0
  94. data/lib/bio/db/kegg/compound.rb +131 -0
  95. data/lib/bio/db/kegg/drug.rb +98 -0
  96. data/lib/bio/db/kegg/enzyme.rb +148 -0
  97. data/lib/bio/db/kegg/expression.rb +155 -0
  98. data/lib/bio/db/kegg/genes.rb +263 -0
  99. data/lib/bio/db/kegg/genome.rb +241 -0
  100. data/lib/bio/db/kegg/glycan.rb +166 -0
  101. data/lib/bio/db/kegg/keggtab.rb +357 -0
  102. data/lib/bio/db/kegg/kgml.rb +256 -0
  103. data/lib/bio/db/kegg/orthology.rb +136 -0
  104. data/lib/bio/db/kegg/reaction.rb +82 -0
  105. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  106. data/lib/bio/db/lasergene.rb +209 -0
  107. data/lib/bio/db/litdb.rb +107 -0
  108. data/lib/bio/db/medline.rb +326 -0
  109. data/lib/bio/db/nbrf.rb +191 -0
  110. data/lib/bio/db/newick.rb +658 -0
  111. data/lib/bio/db/nexus.rb +1854 -0
  112. data/lib/bio/db/pdb.rb +29 -0
  113. data/lib/bio/db/pdb/atom.rb +77 -0
  114. data/lib/bio/db/pdb/chain.rb +210 -0
  115. data/lib/bio/db/pdb/chemicalcomponent.rb +224 -0
  116. data/lib/bio/db/pdb/model.rb +148 -0
  117. data/lib/bio/db/pdb/pdb.rb +1911 -0
  118. data/lib/bio/db/pdb/residue.rb +176 -0
  119. data/lib/bio/db/pdb/utils.rb +399 -0
  120. data/lib/bio/db/prosite.rb +597 -0
  121. data/lib/bio/db/rebase.rb +456 -0
  122. data/lib/bio/db/soft.rb +404 -0
  123. data/lib/bio/db/transfac.rb +375 -0
  124. data/lib/bio/db/url.rb +42 -0
  125. data/lib/bio/feature.rb +139 -0
  126. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  127. data/lib/bio/io/biosql/bioentry.rb +29 -0
  128. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  129. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  130. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  131. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  132. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  133. data/lib/bio/io/biosql/biosequence.rb +11 -0
  134. data/lib/bio/io/biosql/comment.rb +7 -0
  135. data/lib/bio/io/biosql/config/database.yml +20 -0
  136. data/lib/bio/io/biosql/dbxref.rb +13 -0
  137. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  138. data/lib/bio/io/biosql/location.rb +32 -0
  139. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  140. data/lib/bio/io/biosql/ontology.rb +10 -0
  141. data/lib/bio/io/biosql/reference.rb +9 -0
  142. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  143. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  144. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  145. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  146. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  147. data/lib/bio/io/biosql/taxon.rb +12 -0
  148. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  149. data/lib/bio/io/biosql/term.rb +27 -0
  150. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  151. data/lib/bio/io/biosql/term_path.rb +12 -0
  152. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  153. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  154. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  155. data/lib/bio/io/das.rb +461 -0
  156. data/lib/bio/io/dbget.rb +194 -0
  157. data/lib/bio/io/ddbjxml.rb +638 -0
  158. data/lib/bio/io/ebisoap.rb +158 -0
  159. data/lib/bio/io/ensembl.rb +229 -0
  160. data/lib/bio/io/fastacmd.rb +163 -0
  161. data/lib/bio/io/fetch.rb +195 -0
  162. data/lib/bio/io/flatfile.rb +482 -0
  163. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  164. data/lib/bio/io/flatfile/bdb.rb +253 -0
  165. data/lib/bio/io/flatfile/buffer.rb +237 -0
  166. data/lib/bio/io/flatfile/index.rb +1381 -0
  167. data/lib/bio/io/flatfile/indexer.rb +805 -0
  168. data/lib/bio/io/flatfile/splitter.rb +297 -0
  169. data/lib/bio/io/higet.rb +73 -0
  170. data/lib/bio/io/hinv.rb +442 -0
  171. data/lib/bio/io/keggapi.rb +805 -0
  172. data/lib/bio/io/ncbirest.rb +733 -0
  173. data/lib/bio/io/ncbisoap.rb +155 -0
  174. data/lib/bio/io/pubmed.rb +307 -0
  175. data/lib/bio/io/registry.rb +292 -0
  176. data/lib/bio/io/soapwsdl.rb +119 -0
  177. data/lib/bio/io/sql.rb +186 -0
  178. data/lib/bio/location.rb +867 -0
  179. data/lib/bio/map.rb +410 -0
  180. data/lib/bio/pathway.rb +960 -0
  181. data/lib/bio/reference.rb +602 -0
  182. data/lib/bio/sequence.rb +456 -0
  183. data/lib/bio/sequence/aa.rb +152 -0
  184. data/lib/bio/sequence/adapter.rb +108 -0
  185. data/lib/bio/sequence/common.rb +310 -0
  186. data/lib/bio/sequence/compat.rb +123 -0
  187. data/lib/bio/sequence/dblink.rb +54 -0
  188. data/lib/bio/sequence/format.rb +358 -0
  189. data/lib/bio/sequence/format_raw.rb +23 -0
  190. data/lib/bio/sequence/generic.rb +24 -0
  191. data/lib/bio/sequence/na.rb +491 -0
  192. data/lib/bio/shell.rb +44 -0
  193. data/lib/bio/shell/core.rb +578 -0
  194. data/lib/bio/shell/demo.rb +146 -0
  195. data/lib/bio/shell/interface.rb +218 -0
  196. data/lib/bio/shell/irb.rb +95 -0
  197. data/lib/bio/shell/object.rb +71 -0
  198. data/lib/bio/shell/plugin/blast.rb +42 -0
  199. data/lib/bio/shell/plugin/codon.rb +218 -0
  200. data/lib/bio/shell/plugin/das.rb +58 -0
  201. data/lib/bio/shell/plugin/emboss.rb +23 -0
  202. data/lib/bio/shell/plugin/entry.rb +105 -0
  203. data/lib/bio/shell/plugin/flatfile.rb +101 -0
  204. data/lib/bio/shell/plugin/keggapi.rb +181 -0
  205. data/lib/bio/shell/plugin/midi.rb +430 -0
  206. data/lib/bio/shell/plugin/obda.rb +45 -0
  207. data/lib/bio/shell/plugin/psort.rb +56 -0
  208. data/lib/bio/shell/plugin/seq.rb +247 -0
  209. data/lib/bio/shell/plugin/soap.rb +87 -0
  210. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/bioruby_generator.rb +29 -0
  211. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_classes.rhtml +4 -0
  212. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_log.rhtml +27 -0
  213. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_methods.rhtml +11 -0
  214. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_modules.rhtml +4 -0
  215. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_variables.rhtml +7 -0
  216. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-bg.gif +0 -0
  217. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-gem.png +0 -0
  218. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-link.gif +0 -0
  219. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.css +368 -0
  220. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.rhtml +47 -0
  221. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_controller.rb +144 -0
  222. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_helper.rb +47 -0
  223. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/commands.rhtml +8 -0
  224. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/history.rhtml +10 -0
  225. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/index.rhtml +26 -0
  226. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/spinner.gif +0 -0
  227. data/lib/bio/shell/script.rb +25 -0
  228. data/lib/bio/shell/setup.rb +109 -0
  229. data/lib/bio/shell/web.rb +102 -0
  230. data/lib/bio/tree.rb +852 -0
  231. data/lib/bio/util/color_scheme.rb +191 -0
  232. data/lib/bio/util/color_scheme/buried.rb +59 -0
  233. data/lib/bio/util/color_scheme/helix.rb +59 -0
  234. data/lib/bio/util/color_scheme/hydropathy.rb +64 -0
  235. data/lib/bio/util/color_scheme/nucleotide.rb +31 -0
  236. data/lib/bio/util/color_scheme/strand.rb +59 -0
  237. data/lib/bio/util/color_scheme/taylor.rb +50 -0
  238. data/lib/bio/util/color_scheme/turn.rb +59 -0
  239. data/lib/bio/util/color_scheme/zappo.rb +50 -0
  240. data/lib/bio/util/contingency_table.rb +370 -0
  241. data/lib/bio/util/restriction_enzyme.rb +228 -0
  242. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  243. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  244. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  245. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  246. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  247. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  248. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  249. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  250. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  251. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  252. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  253. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  254. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  255. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  256. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  257. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  258. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  259. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  260. data/lib/bio/util/restriction_enzyme/single_strand.rb +200 -0
  261. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  262. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  263. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  264. data/lib/bio/util/sirna.rb +288 -0
  265. data/test/data/HMMER/hmmpfam.out +64 -0
  266. data/test/data/HMMER/hmmsearch.out +88 -0
  267. data/test/data/SOSUI/sample.report +11 -0
  268. data/test/data/TMHMM/sample.report +21 -0
  269. data/test/data/aaindex/DAYM780301 +30 -0
  270. data/test/data/aaindex/PRAM900102 +20 -0
  271. data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
  272. data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
  273. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  274. data/test/data/blast/b0002.faa +15 -0
  275. data/test/data/blast/b0002.faa.m0 +128 -0
  276. data/test/data/blast/b0002.faa.m7 +65 -0
  277. data/test/data/blast/b0002.faa.m8 +1 -0
  278. data/test/data/blast/blastp-multi.m7 +188 -0
  279. data/test/data/command/echoarg2.bat +1 -0
  280. data/test/data/embl/AB090716.embl +65 -0
  281. data/test/data/embl/AB090716.embl.rel89 +63 -0
  282. data/test/data/fasta/example1.txt +75 -0
  283. data/test/data/fasta/example2.txt +21 -0
  284. data/test/data/genscan/sample.report +63 -0
  285. data/test/data/iprscan/merged.raw +32 -0
  286. data/test/data/iprscan/merged.txt +74 -0
  287. data/test/data/paml/codeml/control_file.txt +30 -0
  288. data/test/data/paml/codeml/output.txt +78 -0
  289. data/test/data/paml/codeml/rates +217 -0
  290. data/test/data/prosite/prosite.dat +2233 -0
  291. data/test/data/refseq/nm_126355.entret +64 -0
  292. data/test/data/rpsblast/misc.rpsblast +193 -0
  293. data/test/data/soft/GDS100_partial.soft +92 -0
  294. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  295. data/test/data/uniprot/p53_human.uniprot +1456 -0
  296. data/test/functional/bio/appl/test_pts1.rb +115 -0
  297. data/test/functional/bio/io/test_ensembl.rb +229 -0
  298. data/test/functional/bio/io/test_soapwsdl.rb +52 -0
  299. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  300. data/test/functional/bio/test_command.rb +301 -0
  301. data/test/runner.rb +14 -0
  302. data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
  303. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  304. data/test/unit/bio/appl/blast/test_report.rb +1135 -0
  305. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  306. data/test/unit/bio/appl/genscan/test_report.rb +182 -0
  307. data/test/unit/bio/appl/hmmer/test_report.rb +342 -0
  308. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  309. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  310. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  311. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  312. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  313. data/test/unit/bio/appl/sosui/test_report.rb +81 -0
  314. data/test/unit/bio/appl/targetp/test_report.rb +146 -0
  315. data/test/unit/bio/appl/test_blast.rb +277 -0
  316. data/test/unit/bio/appl/test_fasta.rb +130 -0
  317. data/test/unit/bio/appl/test_psort.rb +57 -0
  318. data/test/unit/bio/appl/test_pts1.rb +77 -0
  319. data/test/unit/bio/appl/tmhmm/test_report.rb +126 -0
  320. data/test/unit/bio/data/test_aa.rb +90 -0
  321. data/test/unit/bio/data/test_codontable.rb +107 -0
  322. data/test/unit/bio/data/test_na.rb +80 -0
  323. data/test/unit/bio/db/embl/test_common.rb +117 -0
  324. data/test/unit/bio/db/embl/test_embl.rb +214 -0
  325. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  326. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  327. data/test/unit/bio/db/embl/test_sptr.rb +1812 -0
  328. data/test/unit/bio/db/embl/test_uniprot.rb +31 -0
  329. data/test/unit/bio/db/kegg/test_genes.rb +45 -0
  330. data/test/unit/bio/db/pdb/test_pdb.rb +152 -0
  331. data/test/unit/bio/db/test_aaindex.rb +197 -0
  332. data/test/unit/bio/db/test_fasta.rb +250 -0
  333. data/test/unit/bio/db/test_gff.rb +1190 -0
  334. data/test/unit/bio/db/test_lasergene.rb +95 -0
  335. data/test/unit/bio/db/test_medline.rb +127 -0
  336. data/test/unit/bio/db/test_newick.rb +293 -0
  337. data/test/unit/bio/db/test_nexus.rb +364 -0
  338. data/test/unit/bio/db/test_prosite.rb +1437 -0
  339. data/test/unit/bio/db/test_rebase.rb +101 -0
  340. data/test/unit/bio/db/test_soft.rb +138 -0
  341. data/test/unit/bio/db/test_url.rb +36 -0
  342. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  343. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  344. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  345. data/test/unit/bio/io/test_ddbjxml.rb +80 -0
  346. data/test/unit/bio/io/test_ensembl.rb +109 -0
  347. data/test/unit/bio/io/test_fastacmd.rb +42 -0
  348. data/test/unit/bio/io/test_flatfile.rb +505 -0
  349. data/test/unit/bio/io/test_soapwsdl.rb +32 -0
  350. data/test/unit/bio/sequence/test_aa.rb +115 -0
  351. data/test/unit/bio/sequence/test_common.rb +373 -0
  352. data/test/unit/bio/sequence/test_compat.rb +69 -0
  353. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  354. data/test/unit/bio/sequence/test_na.rb +330 -0
  355. data/test/unit/bio/shell/plugin/test_seq.rb +185 -0
  356. data/test/unit/bio/test_alignment.rb +1025 -0
  357. data/test/unit/bio/test_command.rb +349 -0
  358. data/test/unit/bio/test_db.rb +96 -0
  359. data/test/unit/bio/test_feature.rb +144 -0
  360. data/test/unit/bio/test_location.rb +599 -0
  361. data/test/unit/bio/test_map.rb +230 -0
  362. data/test/unit/bio/test_pathway.rb +499 -0
  363. data/test/unit/bio/test_reference.rb +252 -0
  364. data/test/unit/bio/test_sequence.rb +329 -0
  365. data/test/unit/bio/test_shell.rb +18 -0
  366. data/test/unit/bio/test_tree.rb +593 -0
  367. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  368. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  369. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  370. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +101 -0
  371. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  372. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  373. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  374. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  375. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  376. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  377. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  378. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  379. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  380. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  381. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  382. data/test/unit/bio/util/test_color_scheme.rb +33 -0
  383. data/test/unit/bio/util/test_contingency_table.rb +94 -0
  384. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  385. data/test/unit/bio/util/test_sirna.rb +245 -0
  386. metadata +543 -0
@@ -0,0 +1,306 @@
1
+ #
2
+ # = bio.rb - Loading all BioRuby modules
3
+ #
4
+ # Copyright:: Copyright (C) 2001-2007
5
+ # Toshiaki Katayama <k@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id:$
9
+ #
10
+
11
+ module Bio
12
+
13
+ BIORUBY_VERSION = [1, 3, 0].extend(Comparable)
14
+
15
+ ### Basic data types
16
+
17
+ ## Sequence
18
+
19
+ autoload :Sequence, 'bio/sequence'
20
+ ## below are described in bio/sequence.rb
21
+ #class Sequence
22
+ # autoload :Common, 'bio/sequence/common'
23
+ # autoload :NA, 'bio/sequence/na'
24
+ # autoload :AA, 'bio/sequence/aa'
25
+ # autoload :Generic, 'bio/sequence/generic'
26
+ # autoload :Format, 'bio/sequence/format'
27
+ # autoload :Adapter, 'bio/sequence/adapter'
28
+ #end
29
+
30
+ ## Locations/Location
31
+
32
+ autoload :Location, 'bio/location'
33
+ autoload :Locations, 'bio/location'
34
+
35
+ ## Features/Feature
36
+
37
+ autoload :Feature, 'bio/feature'
38
+ autoload :Features, 'bio/compat/features'
39
+
40
+ ## References/Reference
41
+
42
+ autoload :Reference, 'bio/reference'
43
+ autoload :References, 'bio/compat/references'
44
+
45
+ ## Pathway/Relation
46
+
47
+ autoload :Pathway, 'bio/pathway'
48
+ autoload :Relation, 'bio/pathway'
49
+
50
+ ## Alignment
51
+
52
+ autoload :Alignment, 'bio/alignment'
53
+
54
+ ## Tree
55
+ autoload :Tree, 'bio/tree'
56
+
57
+ ## Map
58
+ autoload :Map, 'bio/map'
59
+
60
+ ### Constants
61
+
62
+ autoload :NucleicAcid, 'bio/data/na'
63
+ autoload :AminoAcid, 'bio/data/aa'
64
+ autoload :CodonTable, 'bio/data/codontable'
65
+
66
+
67
+ ### DB parsers
68
+
69
+ autoload :DB, 'bio/db'
70
+ autoload :NCBIDB, 'bio/db'
71
+ autoload :KEGGDB, 'bio/db'
72
+ autoload :EMBLDB, 'bio/db'
73
+
74
+
75
+ ### URL Generators
76
+
77
+ autoload :URLGenerators, 'bio/db/url'
78
+ autoload :URL, 'bio/db/url'
79
+
80
+ ## GenBank/RefSeq/DDBJ
81
+
82
+ autoload :GenBank, 'bio/db/genbank/genbank'
83
+ autoload :GenPept, 'bio/db/genbank/genpept'
84
+ autoload :RefSeq, 'bio/db/genbank/refseq'
85
+ autoload :DDBJ, 'bio/db/genbank/ddbj'
86
+ ## below are described in bio/db/genbank/ddbj.rb
87
+ #class DDBJ
88
+ # autoload :XML, 'bio/io/ddbjxml'
89
+ #end
90
+
91
+ ## EMBL/TrEMBL/Swiss-Prot/SPTR
92
+
93
+ autoload :EMBL, 'bio/db/embl/embl'
94
+ autoload :SPTR, 'bio/db/embl/sptr'
95
+ autoload :TrEMBL, 'bio/db/embl/trembl'
96
+ autoload :UniProt, 'bio/db/embl/uniprot'
97
+ autoload :SwissProt, 'bio/db/embl/swissprot'
98
+
99
+ ## KEGG
100
+
101
+ class KEGG
102
+ autoload :GENOME, 'bio/db/kegg/genome'
103
+ autoload :GENES, 'bio/db/kegg/genes'
104
+ autoload :ENZYME, 'bio/db/kegg/enzyme'
105
+ autoload :COMPOUND, 'bio/db/kegg/compound'
106
+ autoload :DRUG, 'bio/db/kegg/drug'
107
+ autoload :GLYCAN, 'bio/db/kegg/glycan'
108
+ autoload :REACTION, 'bio/db/kegg/reaction'
109
+ autoload :BRITE, 'bio/db/kegg/brite'
110
+ autoload :CELL, 'bio/db/kegg/cell'
111
+ autoload :EXPRESSION, 'bio/db/kegg/expression'
112
+ autoload :ORTHOLOGY, 'bio/db/kegg/orthology'
113
+ autoload :KGML, 'bio/db/kegg/kgml'
114
+ autoload :Taxonomy, 'bio/db/kegg/taxonomy'
115
+ end
116
+
117
+ ## other formats
118
+
119
+ autoload :FastaFormat, 'bio/db/fasta'
120
+ autoload :FastaNumericFormat, 'bio/db/fasta' # change to FastaFormat::Numeric ?
121
+ autoload :FastaDefline, 'bio/db/fasta' # change to FastaFormat::Defline
122
+ autoload :GFF, 'bio/db/gff'
123
+ autoload :AAindex, 'bio/db/aaindex'
124
+ autoload :AAindex1, 'bio/db/aaindex' # change to AAindex::AAindex1 ?
125
+ autoload :AAindex2, 'bio/db/aaindex' # change to AAindex::AAindex2 ?
126
+ autoload :TRANSFAC, 'bio/db/transfac'
127
+ autoload :PROSITE, 'bio/db/prosite'
128
+ autoload :LITDB, 'bio/db/litdb'
129
+ autoload :MEDLINE, 'bio/db/medline'
130
+ autoload :FANTOM, 'bio/db/fantom'
131
+ autoload :GO, 'bio/db/go'
132
+ autoload :PDB, 'bio/db/pdb'
133
+ autoload :NBRF, 'bio/db/nbrf'
134
+ autoload :REBASE, 'bio/db/rebase'
135
+ autoload :SOFT, 'bio/db/soft'
136
+ autoload :Lasergene, 'bio/db/lasergene'
137
+
138
+ autoload :Newick, 'bio/db/newick'
139
+ autoload :Nexus, 'bio/db/nexus'
140
+
141
+ ### IO interface modules
142
+
143
+ autoload :Registry, 'bio/io/registry'
144
+ autoload :Fetch, 'bio/io/fetch'
145
+ autoload :SQL, 'bio/io/sql'
146
+ autoload :SOAPWSDL, 'bio/io/soapwsdl'
147
+ autoload :FlatFile, 'bio/io/flatfile'
148
+ autoload :FlatFileIndex, 'bio/io/flatfile/index' # chage to FlatFile::Index ?
149
+ ## below are described in bio/io/flatfile/index.rb
150
+ #class FlatFileIndex
151
+ # autoload :Indexer, 'bio/io/flatfile/indexer'
152
+ # autoload :BDBdefault, 'bio/io/flatfile/bdb'
153
+ # autoload :BDBwrapper, 'bio/io/flatfile/bdb'
154
+ # autoload :BDB_1, 'bio/io/flatfile/bdb'
155
+ #end
156
+
157
+ autoload :PubMed, 'bio/io/pubmed'
158
+ autoload :DAS, 'bio/io/das'
159
+ autoload :DBGET, 'bio/io/dbget'
160
+
161
+ autoload :Ensembl, 'bio/io/ensembl'
162
+ autoload :Hinv, 'bio/io/hinv'
163
+
164
+ ## below are described in bio/appl/blast.rb
165
+ #class Blast
166
+ # autoload :Fastacmd, 'bio/io/fastacmd'
167
+ #end
168
+
169
+ class KEGG
170
+ autoload :API, 'bio/io/keggapi'
171
+ end
172
+
173
+ ## below are described in bio/db/genbank/ddbj.rb
174
+ #class DDBJ
175
+ # autoload :XML, 'bio/io/ddbjxml'
176
+ #end
177
+
178
+ class HGC
179
+ autoload :HiGet, 'bio/io/higet'
180
+ end
181
+
182
+ class EBI
183
+ autoload :SOAP, 'bio/io/ebisoap'
184
+ end
185
+
186
+ class NCBI
187
+ autoload :SOAP, 'bio/io/ncbisoap'
188
+ autoload :REST, 'bio/io/ncbirest'
189
+ end
190
+
191
+
192
+ ### Applications
193
+
194
+ autoload :Fasta, 'bio/appl/fasta'
195
+ ## below are described in bio/appl/fasta.rb
196
+ #class Fasta
197
+ # autoload :Report, 'bio/appl/fasta/format10'
198
+ #end
199
+
200
+ autoload :Blast, 'bio/appl/blast'
201
+ ## below are described in bio/appl/blast.rb
202
+ #class Blast
203
+ # autoload :Fastacmd, 'bio/io/fastacmd'
204
+ # autoload :Report, 'bio/appl/blast/report'
205
+ # autoload :Default, 'bio/appl/blast/format0'
206
+ # autoload :WU, 'bio/appl/blast/wublast'
207
+ # autoload :Bl2seq, 'bio/appl/bl2seq/report'
208
+ # autoload :RPSBlast, 'bio/appl/blast/rpsblast'
209
+ # autoload :NCBIOptions, 'bio/appl/blast/ncbioptions'
210
+ # autoload :Remote, 'bio/appl/blast/remote'
211
+ #end
212
+
213
+ autoload :HMMER, 'bio/appl/hmmer'
214
+ ## below are described in bio/appl/hmmer.rb
215
+ #class HMMER
216
+ # autoload :Report, 'bio/appl/hmmer/report'
217
+ #end
218
+
219
+ autoload :EMBOSS, 'bio/appl/emboss' # use bio/command, improve
220
+
221
+ autoload :PSORT, 'bio/appl/psort'
222
+ ## below are described in bio/appl/psort.rb
223
+ #class PSORT
224
+ # class PSORT1
225
+ # autoload :Report, 'bio/appl/psort/report'
226
+ # end
227
+ # class PSORT2
228
+ # autoload :Report, 'bio/appl/psort/report'
229
+ # end
230
+ #end
231
+
232
+ autoload :TMHMM, 'bio/appl/tmhmm/report'
233
+ autoload :TargetP, 'bio/appl/targetp/report'
234
+ autoload :SOSUI, 'bio/appl/sosui/report'
235
+ autoload :Genscan, 'bio/appl/genscan/report'
236
+
237
+ autoload :ClustalW, 'bio/appl/clustalw'
238
+ ## below are described in bio/appl/clustalw.rb
239
+ #class ClustalW
240
+ # autoload :Report, 'bio/appl/clustalw/report'
241
+ #end
242
+
243
+ autoload :MAFFT, 'bio/appl/mafft'
244
+ ## below are described in bio/appl/mafft.rb
245
+ #class MAFFT
246
+ # autoload :Report, 'bio/appl/mafft/report'
247
+ #end
248
+
249
+ autoload :Tcoffee, 'bio/appl/tcoffee'
250
+ autoload :Muscle, 'bio/appl/muscle'
251
+ autoload :Probcons, 'bio/appl/probcons'
252
+
253
+ autoload :Sim4, 'bio/appl/sim4'
254
+ ## below are described in bio/appl/sim4.rb
255
+ #class Sim4
256
+ # autoload :Report, 'bio/appl/sim4/report'
257
+ #end
258
+
259
+ autoload :Spidey, 'bio/appl/spidey/report'
260
+ autoload :Blat, 'bio/appl/blat/report'
261
+
262
+ module GCG
263
+ autoload :Msf, 'bio/appl/gcg/msf'
264
+ autoload :Seq, 'bio/appl/gcg/seq'
265
+ end
266
+
267
+ module Phylip
268
+ autoload :PhylipFormat, 'bio/appl/phylip/alignment'
269
+ autoload :DistanceMatrix, 'bio/appl/phylip/distance_matrix'
270
+ end
271
+
272
+ autoload :Iprscan, 'bio/appl/iprscan/report'
273
+
274
+ autoload :PAML, 'bio/appl/paml/common'
275
+ ## below are described in bio/appl/paml/common.rb
276
+ # module PAML
277
+ # autoload :Codeml, 'bio/appl/paml/codeml'
278
+ # autoload :Baseml, 'bio/appl/paml/baseml'
279
+ # autoload :Yn00, 'bio/appl/paml/yn00'
280
+ # end
281
+
282
+ ### Utilities
283
+
284
+ autoload :SiRNA, 'bio/util/sirna'
285
+ autoload :ColorScheme, 'bio/util/color_scheme'
286
+ autoload :ContingencyTable, 'bio/util/contingency_table'
287
+ autoload :RestrictionEnzyme, 'bio/util/restriction_enzyme'
288
+
289
+ ### Service libraries
290
+ autoload :Command, 'bio/command'
291
+
292
+ ### Provide BioRuby shell 'command' also as 'Bio.command' (like ChemRuby)
293
+
294
+ def self.method_missing(*args)
295
+ require 'bio/shell'
296
+ extend Bio::Shell
297
+ public_class_method(*Bio::Shell.private_instance_methods)
298
+ if Bio.respond_to?(args.first)
299
+ Bio.send(*args)
300
+ else
301
+ raise NameError
302
+ end
303
+ end
304
+
305
+ end
306
+
@@ -0,0 +1,2518 @@
1
+ #
2
+ # = bio/alignment.rb - multiple alignment of sequences
3
+ #
4
+ # Copyright:: Copyright (C) 2003, 2005, 2006
5
+ # GOTO Naohisa <ng@bioruby.org>
6
+ #
7
+ # License:: The Ruby License
8
+ #
9
+ # $Id: alignment.rb,v 1.24 2007/12/26 14:08:02 ngoto Exp $
10
+ #
11
+ # = About Bio::Alignment
12
+ #
13
+ # Please refer document of Bio::Alignment module.
14
+ #
15
+ # = References
16
+ #
17
+ # * Bio::Align::AlignI class of the BioPerl.
18
+ # http://doc.bioperl.org/releases/bioperl-1.4/Bio/Align/AlignI.html
19
+ #
20
+ # * Bio::SimpleAlign class of the BioPerl.
21
+ # http://doc.bioperl.org/releases/bioperl-1.4/Bio/SimpleAlign.html
22
+ #
23
+
24
+ require 'tempfile'
25
+ require 'bio/command'
26
+ require 'bio/sequence'
27
+
28
+ #---
29
+ # (depends on autoload)
30
+ #require 'bio/appl/gcg/seq'
31
+ #+++
32
+
33
+ module Bio
34
+
35
+ #
36
+ # = About Bio::Alignment
37
+ #
38
+ # Bio::Alignment is a namespace of classes/modules for multiple sequence
39
+ # alignment.
40
+ #
41
+ # = Multiple alignment container classes
42
+ #
43
+ # == Bio::Alignment::OriginalAlignment
44
+ #
45
+ # == Bio::Alignment::SequenceArray
46
+ #
47
+ # == Bio::Alignment::SequenceHash
48
+ #
49
+ # = Bio::Alignment::Site
50
+ #
51
+ # = Modules
52
+ #
53
+ # == Bio::Alignment::EnumerableExtension
54
+ #
55
+ # Mix-in for classes included Enumerable.
56
+ #
57
+ # == Bio::Alignment::ArrayExtension
58
+ #
59
+ # Mix-in for Array or Array-like classes.
60
+ #
61
+ # == Bio::Alignment::HashExtension
62
+ #
63
+ # Mix-in for Hash or Hash-like classes.
64
+ #
65
+ # == Bio::Alignment::SiteMethods
66
+ #
67
+ # == Bio::Alignment::PropertyMethods
68
+ #
69
+ # = Bio::Alignment::GAP
70
+ #
71
+ # = Compatibility from older BioRuby
72
+ #
73
+ module Alignment
74
+
75
+ autoload :MultiFastaFormat, 'bio/appl/mafft/report'
76
+
77
+ # Bio::Alignment::PropertyMethods is a set of methods to treat
78
+ # the gap character and so on.
79
+ module PropertyMethods
80
+ # regular expression for detecting gaps.
81
+ GAP_REGEXP = /[^a-zA-Z]/
82
+ # gap character
83
+ GAP_CHAR = '-'.freeze
84
+ # missing character
85
+ MISSING_CHAR = '?'.freeze
86
+
87
+ # If given character is a gap, returns true.
88
+ # Otherwise, return false.
89
+ # Note that <em>s</em> must be a String which contain a single character.
90
+ def is_gap?(s)
91
+ (gap_regexp =~ s) ? true : false
92
+ end
93
+
94
+ # Returns regular expression for checking gap.
95
+ def gap_regexp
96
+ ((defined? @gap_regexp) ? @gap_regexp : nil) or GAP_REGEXP
97
+ end
98
+ # regular expression for checking gap
99
+ attr_writer :gap_regexp
100
+
101
+ # Gap character.
102
+ def gap_char
103
+ ((defined? @gap_char) ? @gap_char : nil) or GAP_CHAR
104
+ end
105
+ # gap character
106
+ attr_writer :gap_char
107
+
108
+ # Character if the site is missing or unknown.
109
+ def missing_char
110
+ ((defined? @missing_char) ? @missing_char : nil) or MISSING_CHAR
111
+ end
112
+ # Character if the site is missing or unknown.
113
+ attr_writer :missing_char
114
+
115
+ # Returns class of the sequence.
116
+ # If instance variable @seqclass (which can be
117
+ # set by 'seqclass=' method) is set, simply returns the value.
118
+ # Otherwise, returns the first sequence's class.
119
+ # If no sequences are found, returns nil.
120
+ def seqclass
121
+ ((defined? @seqclass) ? @seqclass : nil) or String
122
+ end
123
+
124
+ # The class of the sequence.
125
+ # The value must be String or its derivatives.
126
+ attr_writer :seqclass
127
+
128
+ # Returns properties defined in the object as an hash.
129
+ def get_all_property
130
+ ret = {}
131
+ if defined? @gap_regexp
132
+ ret[:gap_regexp] = @gap_regexp
133
+ end
134
+ if defined? @gap_char
135
+ ret[:gap_char] = @gap_char
136
+ end
137
+ if defined? @missing_char
138
+ ret[:missing_char] = @missing_char
139
+ end
140
+ if defined? @seqclass
141
+ ret[:seqclass] = @seqclass
142
+ end
143
+ ret
144
+ end
145
+
146
+ # Sets properties from given hash.
147
+ # <em>hash</em> would be a return value of <tt>get_character</tt> method.
148
+ def set_all_property(hash)
149
+ @gap_regexp = hash[:gap_regexp] if hash.has_key?(:gap_regexp)
150
+ @gap_char = hash[:gap_char] if hash.has_key?(:gap_char)
151
+ @missing_char = hash[:missing_char] if hash.has_key?(:missing_char)
152
+ @seqclass = hash[:seqclass] if hash.has_key?(:seqclass)
153
+ self
154
+ end
155
+ end #module PropertyMethods
156
+
157
+ # Bio::Alignment::SiteMethods is a set of methods for
158
+ # Bio::Alignment::Site.
159
+ # It can also be used for extending an array of single-letter strings.
160
+ module SiteMethods
161
+ include PropertyMethods
162
+
163
+ # If there are gaps, returns true. Otherwise, returns false.
164
+ def has_gap?
165
+ (find { |x| is_gap?(x) }) ? true : false
166
+ end
167
+
168
+ # Removes gaps in the site. (destructive method)
169
+ def remove_gaps!
170
+ flag = nil
171
+ self.collect! do |x|
172
+ if is_gap?(x) then flag = self; nil; else x; end
173
+ end
174
+ self.compact!
175
+ flag
176
+ end
177
+
178
+ # Returns consensus character of the site.
179
+ # If consensus is found, eturns a single-letter string.
180
+ # If not, returns nil.
181
+ def consensus_string(threshold = 1.0)
182
+ return nil if self.size <= 0
183
+ return self[0] if self.sort.uniq.size == 1
184
+ h = Hash.new(0)
185
+ self.each { |x| h[x] += 1 }
186
+ total = self.size
187
+ b = h.to_a.sort do |x,y|
188
+ z = (y[1] <=> x[1])
189
+ z = (self.index(x[0]) <=> self.index(y[0])) if z == 0
190
+ z
191
+ end
192
+ if total * threshold <= b[0][1] then
193
+ b[0][0]
194
+ else
195
+ nil
196
+ end
197
+ end
198
+
199
+ # IUPAC nucleotide groups. Internal use only.
200
+ IUPAC_NUC = [
201
+ %w( t u ),
202
+ %w( m a c ),
203
+ %w( r a g ),
204
+ %w( w a t u ),
205
+ %w( s c g ),
206
+ %w( y c t u ),
207
+ %w( k g t u ),
208
+ %w( v a c g m r s ),
209
+ %w( h a c t u m w y ),
210
+ %w( d a g t u r w k ),
211
+ %w( b c g t u s y k ),
212
+ %w( n a c g t u m r w s y k v h d b )
213
+ ]
214
+
215
+ # Returns an IUPAC consensus base for the site.
216
+ # If consensus is found, eturns a single-letter string.
217
+ # If not, returns nil.
218
+ def consensus_iupac
219
+ a = self.collect { |x| x.downcase }.sort.uniq
220
+ if a.size == 1 then
221
+ case a[0]
222
+ when 'a', 'c', 'g', 't'
223
+ a[0]
224
+ when 'u'
225
+ 't'
226
+ else
227
+ IUPAC_NUC.find { |x| a[0] == x[0] } ? a[0] : nil
228
+ end
229
+ elsif r = IUPAC_NUC.find { |x| (a - x).size <= 0 } then
230
+ r[0]
231
+ else
232
+ nil
233
+ end
234
+ end
235
+
236
+ # Table of strongly conserved amino-acid groups.
237
+ #
238
+ # The value of the tables are taken from BioPerl
239
+ # (Bio/SimpleAlign.pm in BioPerl 1.0),
240
+ # and the BioPerl's document says that
241
+ # it is taken from Clustalw documentation and
242
+ # These are all the positively scoring groups that occur in the
243
+ # Gonnet Pam250 matrix. The strong and weak groups are
244
+ # defined as strong score >0.5 and weak score =<0.5 respectively.
245
+ #
246
+ StrongConservationGroups = %w(STA NEQK NHQK NDEQ QHRK MILV MILF
247
+ HY FYW).collect { |x| x.split('').sort }
248
+
249
+ # Table of weakly conserved amino-acid groups.
250
+ #
251
+ # Please refer StrongConservationGroups document
252
+ # for the origin of the table.
253
+ WeakConservationGroups = %w(CSA ATV SAG STNK STPA SGND SNDEQK
254
+ NDEQHK NEQHRK FVLIM HFY).collect { |x| x.split('').sort }
255
+
256
+ # Returns the match-line character for the site.
257
+ # This is amino-acid version.
258
+ def match_line_amino(opt = {})
259
+ # opt[:match_line_char] ==> 100% equal default: '*'
260
+ # opt[:strong_match_char] ==> strong match default: ':'
261
+ # opt[:weak_match_char] ==> weak match default: '.'
262
+ # opt[:mismatch_char] ==> mismatch default: ' '
263
+ mlc = (opt[:match_line_char] or '*')
264
+ smc = (opt[:strong_match_char] or ':')
265
+ wmc = (opt[:weak_match_char] or '.')
266
+ mmc = (opt[:mismatch_char] or ' ')
267
+ a = self.collect { |c| c.upcase }.sort.uniq
268
+ a.extend(SiteMethods)
269
+ if a.has_gap? then
270
+ mmc
271
+ elsif a.size == 1 then
272
+ mlc
273
+ elsif StrongConservationGroups.find { |x| (a - x).empty? } then
274
+ smc
275
+ elsif WeakConservationGroups.find { |x| (a - x).empty? } then
276
+ wmc
277
+ else
278
+ mmc
279
+ end
280
+ end
281
+
282
+ # Returns the match-line character for the site.
283
+ # This is nucleic-acid version.
284
+ def match_line_nuc(opt = {})
285
+ # opt[:match_line_char] ==> 100% equal default: '*'
286
+ # opt[:mismatch_char] ==> mismatch default: ' '
287
+ mlc = (opt[:match_line_char] or '*')
288
+ mmc = (opt[:mismatch_char] or ' ')
289
+ a = self.collect { |c| c.upcase }.sort.uniq
290
+ a.extend(SiteMethods)
291
+ if a.has_gap? then
292
+ mmc
293
+ elsif a.size == 1 then
294
+ mlc
295
+ else
296
+ mmc
297
+ end
298
+ end
299
+ end #module SiteMethods
300
+
301
+ # Bio::Alignment::Site stores bases or amino-acids in a
302
+ # site of the alignment.
303
+ # It would store multiple String objects of length 1.
304
+ # Please refer to the document of Array and SiteMethods for methods.
305
+ class Site < Array
306
+ include SiteMethods
307
+ end #module Site
308
+
309
+ # The module Bio::Alignment::EnumerableExtension is a set of useful
310
+ # methods for multiple sequence alignment.
311
+ # It can be included by any classes or can be extended to any objects.
312
+ # The classes or objects must have methods defined in Enumerable,
313
+ # and must have the <tt>each</tt> method
314
+ # which iterates over each sequence (or string) and yields
315
+ # a sequence (or string) object.
316
+ #
317
+ # Optionally, if <tt>each_seq</tt> method is defined,
318
+ # which iterates over each sequence (or string) and yields
319
+ # each sequence (or string) object, it is used instead of <tt>each</tt>.
320
+ #
321
+ # Note that the <tt>each</tt> or <tt>each_seq</tt> method would be
322
+ # called multiple times.
323
+ # This means that the module is not suitable for IO objects.
324
+ # In addition, <tt>break</tt> would be used in the given block and
325
+ # destructive methods would be used to the sequences.
326
+ #
327
+ # For Array or Hash objects, you'd better using
328
+ # ArrayExtension or HashExtension modules, respectively.
329
+ # They would have built-in <tt>each_seq</tt> method and/or
330
+ # some methods would be redefined.
331
+ #
332
+ module EnumerableExtension
333
+ include PropertyMethods
334
+
335
+ # Iterates over each sequences.
336
+ # Yields a sequence.
337
+ # It acts the same as Enumerable#each.
338
+ #
339
+ # You would redefine the method suitable for the class/object.
340
+ def each_seq(&block) #:yields: seq
341
+ each(&block)
342
+ end
343
+
344
+ # Returns class of the sequence.
345
+ # If instance variable @seqclass (which can be
346
+ # set by 'seqclass=' method) is set, simply returns the value.
347
+ # Otherwise, returns the first sequence's class.
348
+ # If no sequences are found, returns nil.
349
+ def seqclass
350
+ if (defined? @seqclass) and @seqclass then
351
+ @seqclass
352
+ else
353
+ klass = nil
354
+ each_seq do |s|
355
+ if s then
356
+ klass = s.class
357
+ break if klass
358
+ end
359
+ end
360
+ (klass or String)
361
+ end
362
+ end
363
+
364
+ # Returns the alignment length.
365
+ # Returns the longest length of the sequence in the alignment.
366
+ def alignment_length
367
+ maxlen = 0
368
+ each_seq do |s|
369
+ x = s.length
370
+ maxlen = x if x > maxlen
371
+ end
372
+ maxlen
373
+ end
374
+ alias seq_length alignment_length
375
+
376
+ # Gets a site of the position.
377
+ # Returns a Bio::Alignment::Site object.
378
+ #
379
+ # If the position is out of range, it returns the site
380
+ # of which all are gaps.
381
+ #
382
+ # It is a private method.
383
+ # Only difference from public alignment_site method is
384
+ # it does not do <tt>set_all_property(get_all_property)</tt>.
385
+ def _alignment_site(position)
386
+ site = Site.new
387
+ each_seq do |s|
388
+ c = s[position, 1]
389
+ if c.to_s.empty?
390
+ c = seqclass.new(gap_char)
391
+ end
392
+ site << c
393
+ end
394
+ site
395
+ end
396
+ private :_alignment_site
397
+
398
+ # Gets a site of the position.
399
+ # Returns a Bio::Alignment::Site object.
400
+ #
401
+ # If the position is out of range, it returns the site
402
+ # of which all are gaps.
403
+ def alignment_site(position)
404
+ site = _alignment_site(position)
405
+ site.set_all_property(get_all_property)
406
+ site
407
+ end
408
+
409
+ # Iterates over each site of the alignment.
410
+ # It yields a Bio::Alignment::Site object (which inherits Array).
411
+ # It returns self.
412
+ def each_site
413
+ cp = get_all_property
414
+ (0...alignment_length).each do |i|
415
+ site = _alignment_site(i)
416
+ site.set_all_property(cp)
417
+ yield(site)
418
+ end
419
+ self
420
+ end
421
+
422
+ # Iterates over each site of the alignment, with specifying
423
+ # start, stop positions and step.
424
+ # It yields Bio::Alignment::Site object (which inherits Array).
425
+ # It returns self.
426
+ # It is same as
427
+ # <tt>start.step(stop, step) { |i| yield alignment_site(i) }</tt>.
428
+ def each_site_step(start, stop, step = 1)
429
+ cp = get_all_property
430
+ start.step(stop, step) do |i|
431
+ site = _alignment_site(i)
432
+ site.set_all_property(cp)
433
+ yield(site)
434
+ end
435
+ self
436
+ end
437
+
438
+ # Iterates over each sequence and results running blocks
439
+ # are collected and returns a new alignment as a
440
+ # Bio::Alignment::SequenceArray object.
441
+ #
442
+ # Note that it would be redefined if you want to change
443
+ # return value's class.
444
+ #
445
+ def alignment_collect
446
+ a = SequenceArray.new
447
+ a.set_all_property(get_all_property)
448
+ each_seq do |str|
449
+ a << yield(str)
450
+ end
451
+ a
452
+ end
453
+
454
+ # Returns specified range of the alignment.
455
+ # For each sequence, the '[]' method (it may be String#[])
456
+ # is executed, and returns a new alignment
457
+ # as a Bio::Alignment::SequenceArray object.
458
+ #
459
+ # Unlike alignment_slice method, the result alignment are
460
+ # guaranteed to contain String object if the range specified
461
+ # is out of range.
462
+ #
463
+ # If you want to change return value's class, you should redefine
464
+ # alignment_collect method.
465
+ #
466
+ def alignment_window(*arg)
467
+ alignment_collect do |s|
468
+ s[*arg] or seqclass.new('')
469
+ end
470
+ end
471
+ alias window alignment_window
472
+
473
+ # Iterates over each sliding window of the alignment.
474
+ # window_size is the size of sliding window.
475
+ # step is the step of each sliding.
476
+ # It yields a Bio::Alignment::SequenceArray object which contains
477
+ # each sliding window.
478
+ # It returns a Bio::Alignment::SequenceArray object which contains
479
+ # remainder alignment at the terminal end.
480
+ # If window_size is smaller than 0, it returns nil.
481
+ def each_window(window_size, step_size = 1)
482
+ return nil if window_size < 0
483
+ if step_size >= 0 then
484
+ last_step = nil
485
+ 0.step(alignment_length - window_size, step_size) do |i|
486
+ yield alignment_window(i, window_size)
487
+ last_step = i
488
+ end
489
+ alignment_window((last_step + window_size)..-1)
490
+ else
491
+ i = alignment_length - window_size
492
+ while i >= 0
493
+ yield alignment_window(i, window_size)
494
+ i += step_size
495
+ end
496
+ alignment_window(0...(i-step_size))
497
+ end
498
+ end
499
+
500
+ # Iterates over each site of the alignment and results running the
501
+ # block are collected and returns an array.
502
+ # It yields a Bio::Alignment::Site object.
503
+ def collect_each_site
504
+ ary = []
505
+ each_site do |site|
506
+ ary << yield(site)
507
+ end
508
+ ary
509
+ end
510
+
511
+ # Helper method for calculating consensus sequence.
512
+ # It iterates over each site of the alignment.
513
+ # In each site, gaps will be removed if specified with opt.
514
+ # It yields a Bio::Alignment::Site object.
515
+ # Results running the block (String objects are expected)
516
+ # are joined to a string and it returns the string.
517
+ #
518
+ # opt[:gap_mode] ==> 0 -- gaps are regarded as normal characters
519
+ # 1 -- a site within gaps is regarded as a gap
520
+ # -1 -- gaps are eliminated from consensus calculation
521
+ # default: 0
522
+ #
523
+ def consensus_each_site(opt = {})
524
+ mchar = (opt[:missing_char] or self.missing_char)
525
+ gap_mode = opt[:gap_mode]
526
+ case gap_mode
527
+ when 0, nil
528
+ collect_each_site do |a|
529
+ yield(a) or mchar
530
+ end.join('')
531
+ when 1
532
+ collect_each_site do |a|
533
+ a.has_gap? ? gap_char : (yield(a) or mchar)
534
+ end.join('')
535
+ when -1
536
+ collect_each_site do |a|
537
+ a.remove_gaps!
538
+ a.empty? ? gap_char : (yield(a) or mchar)
539
+ end.join('')
540
+ else
541
+ raise ':gap_mode must be 0, 1 or -1'
542
+ end
543
+ end
544
+
545
+ # Returns the consensus string of the alignment.
546
+ # 0.0 <= threshold <= 1.0 is expected.
547
+ #
548
+ # It resembles the BioPerl's AlignI::consensus_string method.
549
+ #
550
+ # Please refer to the consensus_each_site method for opt.
551
+ #
552
+ def consensus_string(threshold = 1.0, opt = {})
553
+ consensus_each_site(opt) do |a|
554
+ a.consensus_string(threshold)
555
+ end
556
+ end
557
+
558
+ # Returns the IUPAC consensus string of the alignment
559
+ # of nucleic-acid sequences.
560
+ #
561
+ # It resembles the BioPerl's AlignI::consensus_iupac method.
562
+ #
563
+ # Please refer to the consensus_each_site method for opt.
564
+ #
565
+ def consensus_iupac(opt = {})
566
+ consensus_each_site(opt) do |a|
567
+ a.consensus_iupac
568
+ end
569
+ end
570
+
571
+ # Returns the match line stirng of the alignment
572
+ # of amino-acid sequences.
573
+ #
574
+ # It resembles the BioPerl's AlignI::match_line method.
575
+ #
576
+ # opt[:match_line_char] ==> 100% equal default: '*'
577
+ # opt[:strong_match_char] ==> strong match default: ':'
578
+ # opt[:weak_match_char] ==> weak match default: '.'
579
+ # opt[:mismatch_char] ==> mismatch default: ' '
580
+ #
581
+ # More opt can be accepted.
582
+ # Please refer to the consensus_each_site method for opt.
583
+ #
584
+ def match_line_amino(opt = {})
585
+ collect_each_site do |a|
586
+ a.match_line_amino(opt)
587
+ end.join('')
588
+ end
589
+
590
+ # Returns the match line stirng of the alignment
591
+ # of nucleic-acid sequences.
592
+ #
593
+ # It resembles the BioPerl's AlignI::match_line method.
594
+ #
595
+ # opt[:match_line_char] ==> 100% equal default: '*'
596
+ # opt[:mismatch_char] ==> mismatch default: ' '
597
+ #
598
+ # More opt can be accepted.
599
+ # Please refer to the consensus_each_site method for opt.
600
+ #
601
+ def match_line_nuc(opt = {})
602
+ collect_each_site do |a|
603
+ a.match_line_nuc(opt)
604
+ end.join('')
605
+ end
606
+
607
+ # Returns the match line stirng of the alignment
608
+ # of nucleic- or amino-acid sequences.
609
+ # The type of the sequence is automatically determined
610
+ # or you can specify with opt[:type].
611
+ #
612
+ # It resembles the BioPerl's AlignI::match_line method.
613
+ #
614
+ # opt[:type] ==> :na or :aa (or determined by sequence class)
615
+ # opt[:match_line_char] ==> 100% equal default: '*'
616
+ # opt[:strong_match_char] ==> strong match default: ':'
617
+ # opt[:weak_match_char] ==> weak match default: '.'
618
+ # opt[:mismatch_char] ==> mismatch default: ' '
619
+ # :strong_ and :weak_match_char are used only in amino mode (:aa)
620
+ #
621
+ # More opt can be accepted.
622
+ # Please refer to the consensus_each_site method for opt.
623
+ #
624
+ def match_line(opt = {})
625
+ case opt[:type]
626
+ when :aa
627
+ amino = true
628
+ when :na, :dna, :rna
629
+ amino = false
630
+ else
631
+ if seqclass == Bio::Sequence::AA then
632
+ amino = true
633
+ elsif seqclass == Bio::Sequence::NA then
634
+ amino = false
635
+ else
636
+ amino = nil
637
+ self.each_seq do |x|
638
+ if /[EFILPQ]/i =~ x
639
+ amino = true
640
+ break
641
+ end
642
+ end
643
+ end
644
+ end
645
+ if amino then
646
+ match_line_amino(opt)
647
+ else
648
+ match_line_nuc(opt)
649
+ end
650
+ end
651
+
652
+ # This is the BioPerl's AlignI::match like method.
653
+ #
654
+ # Changes second to last sequences' sites to match_char(default: '.')
655
+ # when a site is equeal to the first sequence's corresponding site.
656
+ #
657
+ # Note that it is a destructive method.
658
+ #
659
+ # For Hash, please use it carefully because
660
+ # the order of the sequences is inconstant.
661
+ #
662
+ def convert_match(match_char = '.')
663
+ #(BioPerl) AlignI::match like method
664
+ len = alignment_length
665
+ firstseq = nil
666
+ each_seq do |s|
667
+ unless firstseq then
668
+ firstseq = s
669
+ else
670
+ (0...len).each do |i|
671
+ if s[i] and firstseq[i] == s[i] and !is_gap?(firstseq[i..i])
672
+ s[i..i] = match_char
673
+ end
674
+ end
675
+ end
676
+ end
677
+ self
678
+ end
679
+
680
+ # This is the BioPerl's AlignI::unmatch like method.
681
+ #
682
+ # Changes second to last sequences' sites match_char(default: '.')
683
+ # to original sites' characters.
684
+ #
685
+ # Note that it is a destructive method.
686
+ #
687
+ # For Hash, please use it carefully because
688
+ # the order of the sequences is inconstant.
689
+ #
690
+ def convert_unmatch(match_char = '.')
691
+ #(BioPerl) AlignI::unmatch like method
692
+ len = alignment_length
693
+ firstseq = nil
694
+ each_seq do |s|
695
+ unless firstseq then
696
+ firstseq = s
697
+ else
698
+ (0...len).each do |i|
699
+ if s[i..i] == match_char then
700
+ s[i..i] = (firstseq[i..i] or match_char)
701
+ end
702
+ end
703
+ end
704
+ end
705
+ self
706
+ end
707
+
708
+ # Fills gaps to the tail of each sequence if the length of
709
+ # the sequence is shorter than the alignment length.
710
+ #
711
+ # Note that it is a destructive method.
712
+ def alignment_normalize!
713
+ #(original)
714
+ len = alignment_length
715
+ each_seq do |s|
716
+ s << (gap_char * (len - s.length)) if s.length < len
717
+ end
718
+ self
719
+ end
720
+ alias normalize! alignment_normalize!
721
+
722
+ # Removes excess gaps in the tail of the sequences.
723
+ # If removes nothing, returns nil.
724
+ # Otherwise, returns self.
725
+ #
726
+ # Note that it is a destructive method.
727
+ def alignment_rstrip!
728
+ #(String-like)
729
+ len = alignment_length
730
+ newlen = len
731
+ each_site_step(len - 1, 0, -1) do |a|
732
+ a.remove_gaps!
733
+ if a.empty? then
734
+ newlen -= 1
735
+ else
736
+ break
737
+ end
738
+ end
739
+ return nil if newlen >= len
740
+ each_seq do |s|
741
+ s[newlen..-1] = '' if s.length > newlen
742
+ end
743
+ self
744
+ end
745
+ alias rstrip! alignment_rstrip!
746
+
747
+ # Removes excess gaps in the head of the sequences.
748
+ # If removes nothing, returns nil.
749
+ # Otherwise, returns self.
750
+ #
751
+ # Note that it is a destructive method.
752
+ def alignment_lstrip!
753
+ #(String-like)
754
+ pos = 0
755
+ each_site do |a|
756
+ a.remove_gaps!
757
+ if a.empty?
758
+ pos += 1
759
+ else
760
+ break
761
+ end
762
+ end
763
+ return nil if pos <= 0
764
+ each_seq { |s| s[0, pos] = '' }
765
+ self
766
+ end
767
+ alias lstrip! alignment_lstrip!
768
+
769
+ # Removes excess gaps in the sequences.
770
+ # If removes nothing, returns nil.
771
+ # Otherwise, returns self.
772
+ #
773
+ # Note that it is a destructive method.
774
+ def alignment_strip!
775
+ #(String-like)
776
+ r = alignment_rstrip!
777
+ l = alignment_lstrip!
778
+ (r or l)
779
+ end
780
+ alias strip! alignment_strip!
781
+
782
+ # Completely removes ALL gaps in the sequences.
783
+ # If removes nothing, returns nil.
784
+ # Otherwise, returns self.
785
+ #
786
+ # Note that it is a destructive method.
787
+ def remove_all_gaps!
788
+ ret = nil
789
+ each_seq do |s|
790
+ x = s.gsub!(gap_regexp, '')
791
+ ret ||= x
792
+ end
793
+ ret ? self : nil
794
+ end
795
+
796
+ # Returns the specified range of the alignment.
797
+ # For each sequence, the 'slice' method (it may be String#slice,
798
+ # which is the same as String#[]) is executed, and
799
+ # returns a new alignment as a Bio::Alignment::SequenceArray object.
800
+ #
801
+ # Unlike alignment_window method, the result alignment
802
+ # might contain nil.
803
+ #
804
+ # If you want to change return value's class, you should redefine
805
+ # alignment_collect method.
806
+ #
807
+ def alignment_slice(*arg)
808
+ #(String-like)
809
+ #(BioPerl) AlignI::slice like method
810
+ alignment_collect do |s|
811
+ s.slice(*arg)
812
+ end
813
+ end
814
+ alias slice alignment_slice
815
+
816
+ # For each sequence, the 'subseq' method (Bio::Seqeunce::Common#subseq is
817
+ # expected) is executed, and returns a new alignment as
818
+ # a Bio::Alignment::SequenceArray object.
819
+ #
820
+ # All sequences in the alignment are expected to be kind of
821
+ # Bio::Sequence::NA or Bio::Sequence::AA objects.
822
+ #
823
+ # Unlike alignment_window method, the result alignment
824
+ # might contain nil.
825
+ #
826
+ # If you want to change return value's class, you should redefine
827
+ # alignment_collect method.
828
+ #
829
+ def alignment_subseq(*arg)
830
+ #(original)
831
+ alignment_collect do |s|
832
+ s.subseq(*arg)
833
+ end
834
+ end
835
+ alias subseq alignment_subseq
836
+
837
+ # Concatenates the given alignment.
838
+ # <em>align</em> must have <tt>each_seq</tt>
839
+ # or <tt>each</tt> method.
840
+ #
841
+ # Returns self.
842
+ #
843
+ # Note that it is a destructive method.
844
+ #
845
+ # For Hash, please use it carefully because
846
+ # the order of the sequences is inconstant and
847
+ # key information is completely ignored.
848
+ #
849
+ def alignment_concat(align)
850
+ flag = nil
851
+ a = []
852
+ each_seq { |s| a << s }
853
+ i = 0
854
+ begin
855
+ align.each_seq do |seq|
856
+ flag = true
857
+ a[i].concat(seq) if a[i] and seq
858
+ i += 1
859
+ end
860
+ return self
861
+ rescue NoMethodError, ArgumentError => evar
862
+ raise evar if flag
863
+ end
864
+ align.each do |seq|
865
+ a[i].concat(seq) if a[i] and seq
866
+ i += 1
867
+ end
868
+ self
869
+ end
870
+ end #module EnumerableExtension
871
+
872
+ module Output
873
+ def output(format, *arg)
874
+ case format
875
+ when :clustal
876
+ output_clustal(*arg)
877
+ when :fasta
878
+ output_fasta(*arg)
879
+ when :phylip
880
+ output_phylip(*arg)
881
+ when :phylipnon
882
+ output_phylipnon(*arg)
883
+ when :msf
884
+ output_msf(*arg)
885
+ when :molphy
886
+ output_molphy(*arg)
887
+ else
888
+ raise "Unknown format: #{format.inspect}"
889
+ end
890
+ end
891
+
892
+ # Check whether there are same names for ClustalW format.
893
+ #
894
+ # array:: names of the sequences (array of string)
895
+ # len:: length to check (default:30)
896
+ def __clustal_have_same_name?(array, len = 30)
897
+ na30 = array.collect do |k|
898
+ k.to_s.split(/[\x00\s]/)[0].to_s[0, len].gsub(/\:\;\,\(\)/, '_').to_s
899
+ end
900
+ #p na30
901
+ na30idx = (0...(na30.size)).to_a
902
+ na30idx.sort! do |x,y|
903
+ na30[x] <=> na30[y]
904
+ end
905
+ #p na30idx
906
+ y = nil
907
+ dupidx = []
908
+ na30idx.each do |x|
909
+ if y and na30[y] == na30[x] then
910
+ dupidx << y
911
+ dupidx << x
912
+ end
913
+ y = x
914
+ end
915
+ if dupidx.size > 0 then
916
+ dupidx.sort!
917
+ dupidx.uniq!
918
+ dupidx
919
+ else
920
+ false
921
+ end
922
+ end
923
+ private :__clustal_have_same_name?
924
+
925
+ # Changes sequence names if there are conflicted names
926
+ # for ClustalW format.
927
+ #
928
+ # array:: names of the sequences (array of string)
929
+ # len:: length to check (default:30)
930
+ def __clustal_avoid_same_name(array, len = 30)
931
+ na = array.collect { |k| k.to_s.gsub(/[\r\n\x00]/, ' ') }
932
+ if dupidx = __clustal_have_same_name?(na, len)
933
+ procs = [
934
+ Proc.new { |s, i|
935
+ s[0, len].to_s.gsub(/\s/, '_') + s[len..-1].to_s
936
+ },
937
+ # Proc.new { |s, i|
938
+ # "#{i}_#{s}"
939
+ # },
940
+ ]
941
+ procs.each do |pr|
942
+ dupidx.each do |i|
943
+ s = array[i]
944
+ na[i] = pr.call(s.to_s, i)
945
+ end
946
+ dupidx = __clustal_have_same_name?(na, len)
947
+ break unless dupidx
948
+ end
949
+ if dupidx then
950
+ na.each_with_index do |s, i|
951
+ na[i] = "#{i}_#{s}"
952
+ end
953
+ end
954
+ end
955
+ na
956
+ end
957
+ private :__clustal_avoid_same_name
958
+
959
+ # Generates ClustalW-formatted text
960
+ # seqs:: sequences (must be an alignment object)
961
+ # names:: names of the sequences
962
+ # options:: options
963
+ def __clustal_formatter(seqs, names, options = {})
964
+ #(original)
965
+ aln = [ "CLUSTAL (0.00) multiple sequence alignment\n\n" ]
966
+ len = seqs.seq_length
967
+ sn = names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') }
968
+ if options[:replace_space]
969
+ sn.collect! { |x| x.gsub(/\s/, '_') }
970
+ end
971
+ if !options.has_key?(:escape) or options[:escape]
972
+ sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
973
+ end
974
+ if !options.has_key?(:split) or options[:split]
975
+ sn.collect! { |x| x.split(/\s/)[0].to_s }
976
+ end
977
+ if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
978
+ sn = __clustal_avoid_same_name(sn)
979
+ end
980
+
981
+ if sn.find { |x| x.length > 10 } then
982
+ seqwidth = 50
983
+ namewidth = 30
984
+ sep = ' ' * 6
985
+ else
986
+ seqwidth = 60
987
+ namewidth = 10
988
+ sep = ' ' * 6
989
+ end
990
+ seqregexp = Regexp.new("(.{1,#{seqwidth}})")
991
+ gchar = (options[:gap_char] or '-')
992
+
993
+ case options[:type].to_s
994
+ when /protein/i, /aa/i
995
+ mopt = { :type => :aa }
996
+ when /na/i
997
+ mopt = { :type => :na }
998
+ else
999
+ mopt = {}
1000
+ end
1001
+ mline = (options[:match_line] or seqs.match_line(mopt))
1002
+
1003
+ aseqs = Array.new(seqs.number_of_sequences).clear
1004
+ seqs.each_seq do |s|
1005
+ aseqs << s.to_s.gsub(seqs.gap_regexp, gchar)
1006
+ end
1007
+ case options[:case].to_s
1008
+ when /lower/i
1009
+ aseqs.each { |s| s.downcase! }
1010
+ when /upper/i
1011
+ aseqs.each { |s| s.upcase! }
1012
+ end
1013
+
1014
+ aseqs << mline
1015
+ aseqs.collect! do |s|
1016
+ snx = sn.shift
1017
+ head = sprintf("%*s", -namewidth, snx.to_s)[0, namewidth] + sep
1018
+ s << (gchar * (len - s.length))
1019
+ s.gsub!(seqregexp, "\\1\n")
1020
+ a = s.split(/^/)
1021
+ if options[:seqnos] and snx then
1022
+ i = 0
1023
+ a.each do |x|
1024
+ x.chomp!
1025
+ l = x.tr(gchar, '').length
1026
+ i += l
1027
+ x.concat(l > 0 ? " #{i}\n" : "\n")
1028
+ end
1029
+ end
1030
+ a.collect { |x| head + x }
1031
+ end
1032
+ lines = (len + seqwidth - 1).div(seqwidth)
1033
+ lines.times do
1034
+ aln << "\n"
1035
+ aseqs.each { |a| aln << a.shift }
1036
+ end
1037
+ aln.join('')
1038
+ end
1039
+ private :__clustal_formatter
1040
+
1041
+ # Generates ClustalW-formatted text
1042
+ # seqs:: sequences (must be an alignment object)
1043
+ # names:: names of the sequences
1044
+ # options:: options
1045
+ def output_clustal(options = {})
1046
+ __clustal_formatter(self, self.sequence_names, options)
1047
+ end
1048
+
1049
+ # to_clustal is deprecated. Instead, please use output_clustal.
1050
+ #---
1051
+ #alias to_clustal output_clustal
1052
+ #+++
1053
+ def to_clustal(*arg)
1054
+ warn "to_clustal is deprecated. Please use output_clustal."
1055
+ output_clustal(*arg)
1056
+ end
1057
+
1058
+ # Generates fasta format text and returns a string.
1059
+ def output_fasta(options={})
1060
+ #(original)
1061
+ width = (options[:width] or 70)
1062
+ if options[:avoid_same_name] then
1063
+ na = __clustal_avoid_same_name(self.sequence_names, 30)
1064
+ else
1065
+ na = self.sequence_names.collect do |k|
1066
+ k.to_s.gsub(/[\r\n\x00]/, ' ')
1067
+ end
1068
+ end
1069
+ if width and width > 0 then
1070
+ w_reg = Regexp.new(".{1,#{width}}")
1071
+ self.collect do |s|
1072
+ ">#{na.shift}\n" + s.to_s.gsub(w_reg, "\\0\n")
1073
+ end.join('')
1074
+ else
1075
+ self.collect do |s|
1076
+ ">#{na.shift}\n" + s.to_s + "\n"
1077
+ end.join('')
1078
+ end
1079
+ end
1080
+
1081
+ # generates phylip interleaved alignment format as a string
1082
+ def output_phylip(options = {})
1083
+ aln, aseqs, lines = __output_phylip_common(options)
1084
+ lines.times do
1085
+ aseqs.each { |a| aln << a.shift }
1086
+ aln << "\n"
1087
+ end
1088
+ aln.pop if aln[-1] == "\n"
1089
+ aln.join('')
1090
+ end
1091
+
1092
+ # generates Phylip3.2 (old) non-interleaved format as a string
1093
+ def output_phylipnon(options = {})
1094
+ aln, aseqs, lines = __output_phylip_common(options)
1095
+ aln.first + aseqs.join('')
1096
+ end
1097
+
1098
+ # common routine for interleaved/non-interleaved phylip format
1099
+ def __output_phylip_common(options = {})
1100
+ len = self.alignment_length
1101
+ aln = [ " #{self.number_of_sequences} #{len}\n" ]
1102
+ sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') }
1103
+ if options[:replace_space]
1104
+ sn.collect! { |x| x.gsub(/\s/, '_') }
1105
+ end
1106
+ if !options.has_key?(:escape) or options[:escape]
1107
+ sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
1108
+ end
1109
+ if !options.has_key?(:split) or options[:split]
1110
+ sn.collect! { |x| x.split(/\s/)[0].to_s }
1111
+ end
1112
+ if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
1113
+ sn = __clustal_avoid_same_name(sn, 10)
1114
+ end
1115
+
1116
+ namewidth = 10
1117
+ seqwidth = (options[:width] or 60)
1118
+ seqwidth = seqwidth.div(10) * 10
1119
+ seqregexp = Regexp.new("(.{1,#{seqwidth.div(10) * 11}})")
1120
+ gchar = (options[:gap_char] or '-')
1121
+
1122
+ aseqs = Array.new(self.number_of_sequences).clear
1123
+ self.each_seq do |s|
1124
+ aseqs << s.to_s.gsub(self.gap_regexp, gchar)
1125
+ end
1126
+ case options[:case].to_s
1127
+ when /lower/i
1128
+ aseqs.each { |s| s.downcase! }
1129
+ when /upper/i
1130
+ aseqs.each { |s| s.upcase! }
1131
+ end
1132
+
1133
+ aseqs.collect! do |s|
1134
+ snx = sn.shift
1135
+ head = sprintf("%*s", -namewidth, snx.to_s)[0, namewidth]
1136
+ head2 = ' ' * namewidth
1137
+ s << (gchar * (len - s.length))
1138
+ s.gsub!(/(.{1,10})/n, " \\1")
1139
+ s.gsub!(seqregexp, "\\1\n")
1140
+ a = s.split(/^/)
1141
+ head += a.shift
1142
+ ret = a.collect { |x| head2 + x }
1143
+ ret.unshift(head)
1144
+ ret
1145
+ end
1146
+ lines = (len + seqwidth - 1).div(seqwidth)
1147
+ [ aln, aseqs, lines ]
1148
+ end
1149
+
1150
+ # Generates Molphy alignment format text as a string
1151
+ def output_molphy(options = {})
1152
+ len = self.alignment_length
1153
+ header = "#{self.number_of_sequences} #{len}\n"
1154
+ sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') }
1155
+ if options[:replace_space]
1156
+ sn.collect! { |x| x.gsub(/\s/, '_') }
1157
+ end
1158
+ if !options.has_key?(:escape) or options[:escape]
1159
+ sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
1160
+ end
1161
+ if !options.has_key?(:split) or options[:split]
1162
+ sn.collect! { |x| x.split(/\s/)[0].to_s }
1163
+ end
1164
+ if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
1165
+ sn = __clustal_avoid_same_name(sn, 30)
1166
+ end
1167
+
1168
+ seqwidth = (options[:width] or 60)
1169
+ seqregexp = Regexp.new("(.{1,#{seqwidth}})")
1170
+ gchar = (options[:gap_char] or '-')
1171
+
1172
+ aseqs = Array.new(len).clear
1173
+ self.each_seq do |s|
1174
+ aseqs << s.to_s.gsub(self.gap_regexp, gchar)
1175
+ end
1176
+ case options[:case].to_s
1177
+ when /lower/i
1178
+ aseqs.each { |s| s.downcase! }
1179
+ when /upper/i
1180
+ aseqs.each { |s| s.upcase! }
1181
+ end
1182
+
1183
+ aseqs.collect! do |s|
1184
+ s << (gchar * (len - s.length))
1185
+ s.gsub!(seqregexp, "\\1\n")
1186
+ sn.shift + "\n" + s
1187
+ end
1188
+ aseqs.unshift(header)
1189
+ aseqs.join('')
1190
+ end
1191
+
1192
+ # Generates msf formatted text as a string
1193
+ def output_msf(options = {})
1194
+ len = self.seq_length
1195
+
1196
+ if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
1197
+ sn = __clustal_avoid_same_name(self.sequence_names)
1198
+ else
1199
+ sn = self.sequence_names.collect do |x|
1200
+ x.to_s.gsub(/[\r\n\x00]/, ' ')
1201
+ end
1202
+ end
1203
+ if !options.has_key?(:replace_space) or options[:replace_space]
1204
+ sn.collect! { |x| x.gsub(/\s/, '_') }
1205
+ end
1206
+ if !options.has_key?(:escape) or options[:escape]
1207
+ sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
1208
+ end
1209
+ if !options.has_key?(:split) or options[:split]
1210
+ sn.collect! { |x| x.split(/\s/)[0].to_s }
1211
+ end
1212
+
1213
+ seqwidth = 50
1214
+ namewidth = [31, sn.collect { |x| x.length }.max ].min
1215
+ sep = ' ' * 2
1216
+
1217
+ seqregexp = Regexp.new("(.{1,#{seqwidth}})")
1218
+ gchar = (options[:gap_char] or '.')
1219
+ pchar = (options[:padding_char] or '~')
1220
+
1221
+ aseqs = Array.new(self.number_of_sequences).clear
1222
+ self.each_seq do |s|
1223
+ aseqs << s.to_s.gsub(self.gap_regexp, gchar)
1224
+ end
1225
+ aseqs.each do |s|
1226
+ s.sub!(/\A#{Regexp.escape(gchar)}+/) { |x| pchar * x.length }
1227
+ s.sub!(/#{Regexp.escape(gchar)}+\z/, '')
1228
+ s << (pchar * (len - s.length))
1229
+ end
1230
+
1231
+ case options[:case].to_s
1232
+ when /lower/i
1233
+ aseqs.each { |s| s.downcase! }
1234
+ when /upper/i
1235
+ aseqs.each { |s| s.upcase! }
1236
+ else #default upcase
1237
+ aseqs.each { |s| s.upcase! }
1238
+ end
1239
+
1240
+ case options[:type].to_s
1241
+ when /protein/i, /aa/i
1242
+ amino = true
1243
+ when /na/i
1244
+ amino = false
1245
+ else
1246
+ if seqclass == Bio::Sequence::AA then
1247
+ amino = true
1248
+ elsif seqclass == Bio::Sequence::NA then
1249
+ amino = false
1250
+ else
1251
+ # if we can't determine, we asuume as protein.
1252
+ amino = aseqs.size
1253
+ aseqs.each { |x| amino -= 1 if /\A[acgt]\z/i =~ x }
1254
+ amino = false if amino <= 0
1255
+ end
1256
+ end
1257
+
1258
+ seq_type = (amino ? 'P' : 'N')
1259
+
1260
+ fn = (options[:entry_id] or self.__id__.abs.to_s + '.msf')
1261
+ dt = (options[:time] or Time.now).strftime('%B %d, %Y %H:%M')
1262
+
1263
+ sums = aseqs.collect { |s| GCG::Seq.calc_checksum(s) }
1264
+ #sums = aseqs.collect { |s| 0 }
1265
+ sum = 0; sums.each { |x| sum += x }; sum %= 10000
1266
+ msf =
1267
+ [
1268
+ "#{seq_type == 'N' ? 'N' : 'A' }A_MULTIPLE_ALIGNMENT 1.0\n",
1269
+ "\n",
1270
+ "\n",
1271
+ " #{fn} MSF: #{len} Type: #{seq_type} #{dt} Check: #{sum} ..\n",
1272
+ "\n"
1273
+ ]
1274
+
1275
+ sn.each do |snx|
1276
+ msf << ' Name: ' +
1277
+ sprintf('%*s', -namewidth, snx.to_s)[0, namewidth] +
1278
+ " Len: #{len} Check: #{sums.shift} Weight: 1.00\n"
1279
+ end
1280
+ msf << "\n//\n"
1281
+
1282
+ aseqs.collect! do |s|
1283
+ snx = sn.shift
1284
+ head = sprintf("%*s", namewidth, snx.to_s)[0, namewidth] + sep
1285
+ s.gsub!(seqregexp, "\\1\n")
1286
+ a = s.split(/^/)
1287
+ a.collect { |x| head + x }
1288
+ end
1289
+ lines = (len + seqwidth - 1).div(seqwidth)
1290
+ i = 1
1291
+ lines.times do
1292
+ msf << "\n"
1293
+ n_l = i
1294
+ n_r = [ i + seqwidth - 1, len ].min
1295
+ if n_l != n_r then
1296
+ w = [ n_r - n_l + 1 - n_l.to_s.length - n_r.to_s.length, 1 ].max
1297
+ msf << (' ' * namewidth + sep + n_l.to_s +
1298
+ ' ' * w + n_r.to_s + "\n")
1299
+ else
1300
+ msf << (' ' * namewidth + sep + n_l.to_s + "\n")
1301
+ end
1302
+ aseqs.each { |a| msf << a.shift }
1303
+ i += seqwidth
1304
+ end
1305
+ msf << "\n"
1306
+ msf.join('')
1307
+ end
1308
+
1309
+ end #module Output
1310
+
1311
+ module EnumerableExtension
1312
+ include Output
1313
+
1314
+ # Returns number of sequences in this alignment.
1315
+ def number_of_sequences
1316
+ i = 0
1317
+ self.each_seq { |s| i += 1 }
1318
+ i
1319
+ end
1320
+
1321
+ # Returns an array of sequence names.
1322
+ # The order of the names must be the same as
1323
+ # the order of <tt>each_seq</tt>.
1324
+ def sequence_names
1325
+ (0...(self.number_of_sequences)).to_a
1326
+ end
1327
+ end #module EnumerableExtension
1328
+
1329
+ # Bio::Alignment::ArrayExtension is a set of useful methods for
1330
+ # multiple sequence alignment.
1331
+ # It is designed to be extended to array objects or
1332
+ # included in your own classes which inherit Array.
1333
+ # (It can also be included in Array, though not recommended.)
1334
+ #
1335
+ # It possesses all methods defined in EnumerableExtension.
1336
+ # For usage of methods, please refer to EnumerableExtension.
1337
+ module ArrayExtension
1338
+ include EnumerableExtension
1339
+
1340
+ # Iterates over each sequences.
1341
+ # Yields a sequence.
1342
+ #
1343
+ # It works the same as Array#each.
1344
+ def each_seq(&block) #:yields: seq
1345
+ each(&block)
1346
+ end
1347
+
1348
+ # Returns number of sequences in this alignment.
1349
+ def number_of_sequences
1350
+ self.size
1351
+ end
1352
+ end #module ArrayExtension
1353
+
1354
+ # Bio::Alignment::HashExtension is a set of useful methods for
1355
+ # multiple sequence alignment.
1356
+ # It is designed to be extended to hash objects or
1357
+ # included in your own classes which inherit Hash.
1358
+ # (It can also be included in Hash, though not recommended.)
1359
+ #
1360
+ # It possesses all methods defined in EnumerableExtension.
1361
+ # For usage of methods, please refer to EnumerableExtension.
1362
+ #
1363
+ # Because SequenceHash#alignment_collect is redefined,
1364
+ # some methods' return value's class are changed to
1365
+ # SequenceHash instead of SequenceArray.
1366
+ #
1367
+ # Because the order of the objects in a hash is inconstant,
1368
+ # some methods strictly affected with the order of objects
1369
+ # might not work correctly,
1370
+ # e.g. EnumerableExtension#convert_match and #convert_unmatch.
1371
+ module HashExtension
1372
+ include EnumerableExtension
1373
+
1374
+ # Iterates over each sequences.
1375
+ # Yields a sequence.
1376
+ #
1377
+ # It works the same as Hash#each_value.
1378
+ def each_seq #:yields: seq
1379
+ #each_value(&block)
1380
+ each_key { |k| yield self[k] }
1381
+ end
1382
+
1383
+ # Iterates over each sequence and each results running block
1384
+ # are collected and returns a new alignment as a
1385
+ # Bio::Alignment::SequenceHash object.
1386
+ #
1387
+ # Note that it would be redefined if you want to change
1388
+ # return value's class.
1389
+ #
1390
+ def alignment_collect
1391
+ a = SequenceHash.new
1392
+ a.set_all_property(get_all_property)
1393
+ each_pair do |key, str|
1394
+ a.store(key, yield(str))
1395
+ end
1396
+ a
1397
+ end
1398
+
1399
+ # Concatenates the given alignment.
1400
+ # If <em>align</em> is a Hash (or SequenceHash),
1401
+ # sequences of same keys are concatenated.
1402
+ # Otherwise, <em>align</em> must have <tt>each_seq</tt>
1403
+ # or <tt>each</tt> method and
1404
+ # works same as EnumerableExtension#alignment_concat.
1405
+ #
1406
+ # Returns self.
1407
+ #
1408
+ # Note that it is a destructive method.
1409
+ #
1410
+ def alignment_concat(align)
1411
+ flag = nil
1412
+ begin
1413
+ align.each_pair do |key, seq|
1414
+ flag = true
1415
+ if origseq = self[key]
1416
+ origseq.concat(seq)
1417
+ end
1418
+ end
1419
+ return self
1420
+ rescue NoMethodError, ArgumentError =>evar
1421
+ raise evar if flag
1422
+ end
1423
+ a = values
1424
+ i = 0
1425
+ begin
1426
+ align.each_seq do |seq|
1427
+ flag = true
1428
+ a[i].concat(seq) if a[i] and seq
1429
+ i += 1
1430
+ end
1431
+ return self
1432
+ rescue NoMethodError, ArgumentError => evar
1433
+ raise evar if flag
1434
+ end
1435
+ align.each do |seq|
1436
+ a[i].concat(seq) if a[i] and seq
1437
+ i += 1
1438
+ end
1439
+ self
1440
+ end
1441
+
1442
+ # Returns number of sequences in this alignment.
1443
+ def number_of_sequences
1444
+ self.size
1445
+ end
1446
+
1447
+ # Returns an array of sequence names.
1448
+ # The order of the names must be the same as
1449
+ # the order of <tt>each_seq</tt>.
1450
+ def sequence_names
1451
+ self.keys
1452
+ end
1453
+ end #module HashExtension
1454
+
1455
+ # Bio::Alignment::SequenceArray is a container class of
1456
+ # multiple sequence alignment.
1457
+ # Since it inherits Array, it acts completely same as Array.
1458
+ # In addition, methods defined in ArrayExtension and EnumerableExtension
1459
+ # can be used.
1460
+ class SequenceArray < Array
1461
+ include ArrayExtension
1462
+ end #class SequenceArray
1463
+
1464
+ # Bio::Alignment::SequenceHash is a container class of
1465
+ # multiple sequence alignment.
1466
+ # Since it inherits Hash, it acts completely same as Hash.
1467
+ # In addition, methods defined in HashExtension and EnumerableExtension
1468
+ # can be used.
1469
+ class SequenceHash < Hash
1470
+ include HashExtension
1471
+ end #class SequenceHash
1472
+
1473
+ # Bio::Alignment::OriginalPrivate is a set of private methods
1474
+ # for Bio::Alignment::OriginalAlignment.
1475
+ module OriginalPrivate
1476
+
1477
+ # Gets the sequence from given object.
1478
+ def extract_seq(obj)
1479
+ seq = nil
1480
+ if obj.is_a?(Bio::Sequence::NA) or obj.is_a?(Bio::Sequence::AA) then
1481
+ seq = obj
1482
+ else
1483
+ for m in [ :seq, :naseq, :aaseq ]
1484
+ begin
1485
+ seq = obj.send(m)
1486
+ rescue NameError, ArgumentError
1487
+ seq = nil
1488
+ end
1489
+ break if seq
1490
+ end
1491
+ seq = obj unless seq
1492
+ end
1493
+ seq
1494
+ end
1495
+ module_function :extract_seq
1496
+
1497
+ # Gets the name or the definition of the sequence from given object.
1498
+ def extract_key(obj)
1499
+ sn = nil
1500
+ for m in [ :definition, :entry_id ]
1501
+ begin
1502
+ sn = obj.send(m)
1503
+ rescue NameError, ArgumentError
1504
+ sn = nil
1505
+ end
1506
+ break if sn
1507
+ end
1508
+ sn
1509
+ end
1510
+ module_function :extract_key
1511
+ end #module OriginalPrivate
1512
+
1513
+ # Bio::Alignment::OriginalAlignment is
1514
+ # the BioRuby original multiple sequence alignment container class.
1515
+ # It includes HashExtension.
1516
+ #
1517
+ # It is recommended only to use methods defined in EnumerableExtension
1518
+ # (and the each_seq method).
1519
+ # The method only defined in this class might be obsoleted in the future.
1520
+ #
1521
+ class OriginalAlignment
1522
+
1523
+ include Enumerable
1524
+ include HashExtension
1525
+ include OriginalPrivate
1526
+
1527
+ # Read files and creates a new alignment object.
1528
+ #
1529
+ # It will be obsoleted.
1530
+ def self.readfiles(*files)
1531
+ require 'bio/io/flatfile'
1532
+ aln = self.new
1533
+ files.each do |fn|
1534
+ Bio::FlatFile.open(nil, fn) do |ff|
1535
+ aln.add_sequences(ff)
1536
+ end
1537
+ end
1538
+ aln
1539
+ end
1540
+
1541
+ # Creates a new alignment object from given arguments.
1542
+ #
1543
+ # It will be obsoleted.
1544
+ def self.new2(*arg)
1545
+ self.new(arg)
1546
+ end
1547
+
1548
+ # Creates a new alignment object.
1549
+ # <em>seqs</em> may be one of follows:
1550
+ # an array of sequences (or strings),
1551
+ # an array of sequence database objects,
1552
+ # an alignment object.
1553
+ def initialize(seqs = [])
1554
+ @seqs = {}
1555
+ @keys = []
1556
+ self.add_sequences(seqs)
1557
+ end
1558
+
1559
+ # If <em>x</em> is the same value, returns true.
1560
+ # Otherwise, returns false.
1561
+ def ==(x)
1562
+ #(original)
1563
+ if x.is_a?(self.class)
1564
+ self.to_hash == x.to_hash
1565
+ else
1566
+ false
1567
+ end
1568
+ end
1569
+
1570
+ # convert to hash
1571
+ def to_hash
1572
+ #(Hash-like)
1573
+ @seqs
1574
+ end
1575
+
1576
+ # Adds sequences to the alignment.
1577
+ # <em>seqs</em> may be one of follows:
1578
+ # an array of sequences (or strings),
1579
+ # an array of sequence database objects,
1580
+ # an alignment object.
1581
+ def add_sequences(seqs)
1582
+ if block_given? then
1583
+ seqs.each do |x|
1584
+ s, key = yield x
1585
+ self.store(key, s)
1586
+ end
1587
+ else
1588
+ if seqs.is_a?(self.class) then
1589
+ seqs.each_pair do |k, s|
1590
+ self.store(k, s)
1591
+ end
1592
+ elsif seqs.respond_to?(:each_pair)
1593
+ seqs.each_pair do |k, x|
1594
+ s = extract_seq(x)
1595
+ self.store(k, s)
1596
+ end
1597
+ else
1598
+ seqs.each do |x|
1599
+ s = extract_seq(x)
1600
+ k = extract_key(x)
1601
+ self.store(k, s)
1602
+ end
1603
+ end
1604
+ end
1605
+ self
1606
+ end
1607
+
1608
+ # identifiers (or definitions or names) of the sequences
1609
+ attr_reader :keys
1610
+
1611
+ # stores a sequences with the name
1612
+ # key:: name of the sequence
1613
+ # seq:: sequence
1614
+ def __store__(key, seq)
1615
+ #(Hash-like)
1616
+ h = { key => seq }
1617
+ @keys << h.keys[0]
1618
+ @seqs.update(h)
1619
+ seq
1620
+ end
1621
+
1622
+ # stores a sequence with <em>key</em>
1623
+ # (name or definition of the sequence).
1624
+ # Unlike <tt>__store__</tt> method, the method doesn't allow
1625
+ # same keys.
1626
+ # If the key is already used, returns nil.
1627
+ # When succeeded, returns key.
1628
+ def store(key, seq)
1629
+ #(Hash-like) returns key instead of seq
1630
+ if @seqs.has_key?(key) then
1631
+ # don't allow same key
1632
+ # New key is discarded, while existing key is preserved.
1633
+ key = nil
1634
+ end
1635
+ unless key then
1636
+ unless defined?(@serial)
1637
+ @serial = 0
1638
+ end
1639
+ @serial = @seqs.size if @seqs.size > @serial
1640
+ while @seqs.has_key?(@serial)
1641
+ @serial += 1
1642
+ end
1643
+ key = @serial
1644
+ end
1645
+ self.__store__(key, seq)
1646
+ key
1647
+ end
1648
+
1649
+ # Reconstructs internal data structure.
1650
+ # (Like Hash#rehash)
1651
+ def rehash
1652
+ @seqs.rehash
1653
+ oldkeys = @keys
1654
+ tmpkeys = @seqs.keys
1655
+ @keys.collect! do |k|
1656
+ tmpkeys.delete(k)
1657
+ end
1658
+ @keys.compact!
1659
+ @keys.concat(tmpkeys)
1660
+ self
1661
+ end
1662
+
1663
+ # Prepends seq (with key) to the front of the alignment.
1664
+ # (Like Array#unshift)
1665
+ def unshift(key, seq)
1666
+ #(Array-like)
1667
+ self.store(key, seq)
1668
+ k = @keys.pop
1669
+ @keys.unshift(k)
1670
+ k
1671
+ end
1672
+
1673
+ # Removes the first sequence in the alignment and
1674
+ # returns [ key, seq ].
1675
+ def shift
1676
+ k = @keys.shift
1677
+ if k then
1678
+ s = @seqs.delete(k)
1679
+ [ k, s ]
1680
+ else
1681
+ nil
1682
+ end
1683
+ end
1684
+
1685
+ # Gets the <em>n</em>-th sequence.
1686
+ # If not found, returns nil.
1687
+ def order(n)
1688
+ #(original)
1689
+ @seqs[@keys[n]]
1690
+ end
1691
+
1692
+ # Removes the sequence whose key is <em>key</em>.
1693
+ # Returns the removed sequence.
1694
+ # If not found, returns nil.
1695
+ def delete(key)
1696
+ #(Hash-like)
1697
+ @keys.delete(key)
1698
+ @seqs.delete(key)
1699
+ end
1700
+
1701
+ # Returns sequences. (Like Hash#values)
1702
+ def values
1703
+ #(Hash-like)
1704
+ @keys.collect { |k| @seqs[k] }
1705
+ end
1706
+
1707
+ # Adds a sequence without key.
1708
+ # The key is automatically determined.
1709
+ def <<(seq)
1710
+ #(Array-like)
1711
+ self.store(nil, seq)
1712
+ self
1713
+ end
1714
+
1715
+ # Gets a sequence. (Like Hash#[])
1716
+ def [](*arg)
1717
+ #(Hash-like)
1718
+ @seqs[*arg]
1719
+ end
1720
+
1721
+ # Number of sequences in the alignment.
1722
+ def size
1723
+ #(Hash&Array-like)
1724
+ @seqs.size
1725
+ end
1726
+ alias number_of_sequences size
1727
+
1728
+ # If the key exists, returns true. Otherwise, returns false.
1729
+ # (Like Hash#has_key?)
1730
+ def has_key?(key)
1731
+ #(Hash-like)
1732
+ @seqs.has_key?(key)
1733
+ end
1734
+
1735
+ # Iterates over each sequence.
1736
+ # (Like Array#each)
1737
+ def each
1738
+ #(Array-like)
1739
+ @keys.each do |k|
1740
+ yield @seqs[k]
1741
+ end
1742
+ end
1743
+ alias each_seq each
1744
+
1745
+ # Iterates over each key and sequence.
1746
+ # (Like Hash#each_pair)
1747
+ def each_pair
1748
+ #(Hash-like)
1749
+ @keys.each do |k|
1750
+ yield k, @seqs[k]
1751
+ end
1752
+ end
1753
+
1754
+ # Iterates over each sequence, replacing the sequence with the
1755
+ # value returned by the block.
1756
+ def collect!
1757
+ #(Array-like)
1758
+ @keys.each do |k|
1759
+ @seqs[k] = yield @seqs[k]
1760
+ end
1761
+ end
1762
+
1763
+ ###--
1764
+ ### note that 'collect' and 'to_a' is defined in Enumerable
1765
+ ###
1766
+ ### instance-variable-related methods
1767
+ ###++
1768
+
1769
+ # Creates new alignment. Internal use only.
1770
+ def new(*arg)
1771
+ na = self.class.new(*arg)
1772
+ na.set_all_property(get_all_property)
1773
+ na
1774
+ end
1775
+ protected :new
1776
+
1777
+ # Duplicates the alignment
1778
+ def dup
1779
+ #(Hash-like)
1780
+ self.new(self)
1781
+ end
1782
+
1783
+ #--
1784
+ # methods below should not access instance variables
1785
+ #++
1786
+
1787
+ # Merges given alignment and returns a new alignment.
1788
+ def merge(*other)
1789
+ #(Hash-like)
1790
+ na = self.new(self)
1791
+ na.merge!(*other)
1792
+ na
1793
+ end
1794
+
1795
+ # Merge given alignment.
1796
+ # Note that it is destructive method.
1797
+ def merge!(*other)
1798
+ #(Hash-like)
1799
+ if block_given? then
1800
+ other.each do |aln|
1801
+ aln.each_pair do |k, s|
1802
+ if self.has_key?(k) then
1803
+ s = yield k, self[k], s
1804
+ self.to_hash.store(k, s)
1805
+ else
1806
+ self.store(k, s)
1807
+ end
1808
+ end
1809
+ end
1810
+ else
1811
+ other.each do |aln|
1812
+ aln.each_pair do |k, s|
1813
+ self.delete(k) if self.has_key?(k)
1814
+ self.store(k, s)
1815
+ end
1816
+ end
1817
+ end
1818
+ self
1819
+ end
1820
+
1821
+ # Returns the key for a given sequence. If not found, returns nil.
1822
+ def index(seq)
1823
+ #(Hash-like)
1824
+ last_key = nil
1825
+ self.each_pair do |k, s|
1826
+ last_key = k
1827
+ if s.class == seq.class then
1828
+ r = (s == seq)
1829
+ else
1830
+ r = (s.to_s == seq.to_s)
1831
+ end
1832
+ break if r
1833
+ end
1834
+ last_key
1835
+ end
1836
+
1837
+ # Sequences in the alignment are duplicated.
1838
+ # If keys are given to the argument, sequences of given keys are
1839
+ # duplicated.
1840
+ #
1841
+ # It will be obsoleted.
1842
+ def isolate(*arg)
1843
+ #(original)
1844
+ if arg.size == 0 then
1845
+ self.collect! do |s|
1846
+ seqclass.new(s)
1847
+ end
1848
+ else
1849
+ arg.each do |k|
1850
+ if self.has_key?(k) then
1851
+ s = self.delete(key)
1852
+ self.store(k, seqclass.new(s))
1853
+ end
1854
+ end
1855
+ end
1856
+ self
1857
+ end
1858
+
1859
+ # Iterates over each sequence and each results running block
1860
+ # are collected and returns a new alignment.
1861
+ #
1862
+ # The method name 'collect_align' will be obsoleted.
1863
+ # Please use 'alignment_collect' instead.
1864
+ def alignment_collect
1865
+ #(original)
1866
+ na = self.class.new
1867
+ na.set_all_property(get_all_property)
1868
+ self.each_pair do |k, s|
1869
+ na.store(k, yield(s))
1870
+ end
1871
+ na
1872
+ end
1873
+ alias collect_align alignment_collect
1874
+
1875
+ # Removes empty sequences or nil in the alignment.
1876
+ # (Like Array#compact!)
1877
+ def compact!
1878
+ #(Array-like)
1879
+ d = []
1880
+ self.each_pair do |k, s|
1881
+ if !s or s.empty?
1882
+ d << k
1883
+ end
1884
+ end
1885
+ d.each do |k|
1886
+ self.delete(k)
1887
+ end
1888
+ d.empty? ? nil : d
1889
+ end
1890
+
1891
+ # Removes empty sequences or nil and returns new alignment.
1892
+ # (Like Array#compact)
1893
+ def compact
1894
+ #(Array-like)
1895
+ na = self.dup
1896
+ na.compact!
1897
+ na
1898
+ end
1899
+
1900
+ # Adds a sequence to the alignment.
1901
+ # Returns key if succeeded.
1902
+ # Returns nil (and not added to the alignment) if key is already used.
1903
+ #
1904
+ # It resembles BioPerl's AlignI::add_seq method.
1905
+ def add_seq(seq, key = nil)
1906
+ #(BioPerl) AlignI::add_seq like method
1907
+ unless seq.is_a?(Bio::Sequence::NA) or seq.is_a?(Bio::Sequence::AA)
1908
+ s = extract_seq(seq)
1909
+ key = extract_key(seq) unless key
1910
+ seq = s
1911
+ end
1912
+ self.store(key, seq)
1913
+ end
1914
+
1915
+ # Removes given sequence from the alignment.
1916
+ # Returns removed sequence. If nothing removed, returns nil.
1917
+ #
1918
+ # It resembles BioPerl's AlignI::remove_seq.
1919
+ def remove_seq(seq)
1920
+ #(BioPerl) AlignI::remove_seq like method
1921
+ if k = self.index(seq) then
1922
+ self.delete(k)
1923
+ else
1924
+ nil
1925
+ end
1926
+ end
1927
+
1928
+ # Removes sequences from the alignment by given keys.
1929
+ # Returns an alignment object consists of removed sequences.
1930
+ #
1931
+ # It resembles BioPerl's AlignI::purge method.
1932
+ def purge(*arg)
1933
+ #(BioPerl) AlignI::purge like method
1934
+ purged = self.new
1935
+ arg.each do |k|
1936
+ if self[k] then
1937
+ purged.store(k, self.delete(k))
1938
+ end
1939
+ end
1940
+ purged
1941
+ end
1942
+
1943
+ # If block is given, it acts like Array#select (Enumerable#select).
1944
+ # Returns a new alignment containing all sequences of the alignment
1945
+ # for which return value of given block is not false nor nil.
1946
+ #
1947
+ # If no block is given, it acts like the BioPerl's AlignI::select.
1948
+ # Returns a new alignment containing sequences of given keys.
1949
+ #
1950
+ # The BioPerl's AlignI::select-like action will be obsoleted.
1951
+ def select(*arg)
1952
+ #(original)
1953
+ na = self.new
1954
+ if block_given? then
1955
+ # 'arg' is ignored
1956
+ # nearly same action as Array#select (Enumerable#select)
1957
+ self.each_pair.each do |k, s|
1958
+ na.store(k, s) if yield(s)
1959
+ end
1960
+ else
1961
+ # BioPerl's AlignI::select like function
1962
+ arg.each do |k|
1963
+ if s = self[k] then
1964
+ na.store(k, s)
1965
+ end
1966
+ end
1967
+ end
1968
+ na
1969
+ end
1970
+
1971
+ # The method name <tt>slice</tt> will be obsoleted.
1972
+ # Please use <tt>alignment_slice</tt> instead.
1973
+ alias slice alignment_slice
1974
+
1975
+ # The method name <tt>subseq</tt> will be obsoleted.
1976
+ # Please use <tt>alignment_subseq</tt> instead.
1977
+ alias subseq alignment_subseq
1978
+
1979
+ # Not-destructive version of alignment_normalize!.
1980
+ # Returns a new alignment.
1981
+ def normalize
1982
+ #(original)
1983
+ na = self.dup
1984
+ na.alignment_normalize!
1985
+ na
1986
+ end
1987
+
1988
+ # Not-destructive version of alignment_rstrip!.
1989
+ # Returns a new alignment.
1990
+ def rstrip
1991
+ #(String-like)
1992
+ na = self.dup
1993
+ na.isolate
1994
+ na.alignment_rstrip!
1995
+ na
1996
+ end
1997
+
1998
+ # Not-destructive version of alignment_lstrip!.
1999
+ # Returns a new alignment.
2000
+ def lstrip
2001
+ #(String-like)
2002
+ na = self.dup
2003
+ na.isolate
2004
+ na.alignment_lstrip!
2005
+ na
2006
+ end
2007
+
2008
+ # Not-destructive version of alignment_strip!.
2009
+ # Returns a new alignment.
2010
+ def strip
2011
+ #(String-like)
2012
+ na = self.dup
2013
+ na.isolate
2014
+ na.alignment_strip!
2015
+ na
2016
+ end
2017
+
2018
+ # Not-destructive version of remove_gaps!.
2019
+ # Returns a new alignment.
2020
+ #
2021
+ # The method name 'remove_gap' will be obsoleted.
2022
+ # Please use 'remove_all_gaps' instead.
2023
+ def remove_all_gaps
2024
+ #(original)
2025
+ na = self.dup
2026
+ na.isolate
2027
+ na.remove_all_gaps!
2028
+ na
2029
+ end
2030
+
2031
+ # Concatenates a string or an alignment.
2032
+ # Returns self.
2033
+ #
2034
+ # Note that the method will be obsoleted.
2035
+ # Please use <tt>each_seq { |s| s << str }</tt> for concatenating
2036
+ # a string and
2037
+ # <tt>alignment_concat(aln)</tt> for concatenating an alignment.
2038
+ def concat(aln)
2039
+ #(String-like)
2040
+ if aln.respond_to?(:to_str) then #aln.is_a?(String)
2041
+ self.each do |s|
2042
+ s << aln
2043
+ end
2044
+ self
2045
+ else
2046
+ alignment_concat(aln)
2047
+ end
2048
+ end
2049
+
2050
+ # Replace the specified region of the alignment to aln.
2051
+ # aln:: String or Bio::Alignment object
2052
+ # arg:: same format as String#slice
2053
+ #
2054
+ # It will be obsoleted.
2055
+ def replace_slice(aln, *arg)
2056
+ #(original)
2057
+ if aln.respond_to?(:to_str) then #aln.is_a?(String)
2058
+ self.each do |s|
2059
+ s[*arg] = aln
2060
+ end
2061
+ elsif aln.is_a?(self.class) then
2062
+ aln.each_pair do |k, s|
2063
+ self[k][*arg] = s
2064
+ end
2065
+ else
2066
+ i = 0
2067
+ aln.each do |s|
2068
+ self.order(i)[*arg] = s
2069
+ i += 1
2070
+ end
2071
+ end
2072
+ self
2073
+ end
2074
+
2075
+ # Performs multiple alignment by using external program.
2076
+ def do_align(factory)
2077
+ a0 = self.class.new
2078
+ (0...self.size).each { |i| a0.store(i, self.order(i)) }
2079
+ r = factory.query(a0)
2080
+ a1 = r.alignment
2081
+ a0.keys.each do |k|
2082
+ unless a1[k.to_s] then
2083
+ raise 'alignment result is inconsistent with input data'
2084
+ end
2085
+ end
2086
+ a2 = self.new
2087
+ a0.keys.each do |k|
2088
+ a2.store(self.keys[k], a1[k.to_s])
2089
+ end
2090
+ a2
2091
+ end
2092
+
2093
+ # Convert to fasta format and returns an array of strings.
2094
+ #
2095
+ # It will be obsoleted.
2096
+ def to_fasta_array(*arg)
2097
+ #(original)
2098
+ width = nil
2099
+ if arg[0].is_a?(Integer) then
2100
+ width = arg.shift
2101
+ end
2102
+ options = (arg.shift or {})
2103
+ width = options[:width] unless width
2104
+ if options[:avoid_same_name] then
2105
+ na = __clustal_avoid_same_name(self.keys, 30)
2106
+ else
2107
+ na = self.keys.collect { |k| k.to_s.gsub(/[\r\n\x00]/, ' ') }
2108
+ end
2109
+ a = self.collect do |s|
2110
+ ">#{na.shift}\n" +
2111
+ if width then
2112
+ s.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
2113
+ else
2114
+ s.to_s + "\n"
2115
+ end
2116
+ end
2117
+ a
2118
+ end
2119
+
2120
+ # Convets to fasta format and returns an array of FastaFormat objects.
2121
+ #
2122
+ # It will be obsoleted.
2123
+ def to_fastaformat_array(*arg)
2124
+ #(original)
2125
+ require 'bio/db/fasta'
2126
+ a = self.to_fasta_array(*arg)
2127
+ a.collect! do |x|
2128
+ Bio::FastaFormat.new(x)
2129
+ end
2130
+ a
2131
+ end
2132
+
2133
+ # Converts to fasta format and returns a string.
2134
+ #
2135
+ # The specification of the argument will be changed.
2136
+ #
2137
+ # Note: <tt>to_fasta</tt> is deprecated.
2138
+ # Please use <tt>output_fasta</tt> instead.
2139
+ def to_fasta(*arg)
2140
+ #(original)
2141
+ warn "to_fasta is deprecated. Please use output_fasta."
2142
+ self.to_fasta_array(*arg).join('')
2143
+ end
2144
+
2145
+ # The method name <tt>consensus</tt> will be obsoleted.
2146
+ # Please use <tt>consensus_string</tt> instead.
2147
+ alias consensus consensus_string
2148
+ end #class OriginalAlignment
2149
+
2150
+ # Bio::Alignment::GAP is a set of class methods for
2151
+ # gap-related position translation.
2152
+ module GAP
2153
+ # position with gaps are translated into the position without gaps.
2154
+ #<em>seq</em>:: sequence
2155
+ #<em>pos</em>:: position with gaps
2156
+ #<em>gap_regexp</em>:: regular expression to specify gaps
2157
+ def ungapped_pos(seq, pos, gap_regexp)
2158
+ p = seq[0..pos].gsub(gap_regexp, '').length
2159
+ p -= 1 if p > 0
2160
+ p
2161
+ end
2162
+ module_function :ungapped_pos
2163
+
2164
+ # position without gaps are translated into the position with gaps.
2165
+ #<em>seq</em>:: sequence
2166
+ #<em>pos</em>:: position with gaps
2167
+ #<em>gap_regexp</em>:: regular expression to specify gaps
2168
+ def gapped_pos(seq, pos, gap_regexp)
2169
+ olen = seq.gsub(gap_regexp, '').length
2170
+ pos = olen if pos >= olen
2171
+ pos = olen + pos if pos < 0
2172
+
2173
+ i = 0
2174
+ l = pos + 1
2175
+ while l > 0 and i < seq.length
2176
+ x = seq[i, l].gsub(gap_regexp, '').length
2177
+ i += l
2178
+ l -= x
2179
+ end
2180
+ i -= 1 if i > 0
2181
+ i
2182
+ end
2183
+ module_function :gapped_pos
2184
+ end # module GAP
2185
+
2186
+ # creates a new Bio::Alignment::OriginalAlignment object.
2187
+ # Please refer document of OriginalAlignment.new.
2188
+ def self.new(*arg)
2189
+ OriginalAlignment.new(*arg)
2190
+ end
2191
+
2192
+ # creates a new Bio::Alignment::OriginalAlignment object.
2193
+ # Please refer document of OriginalAlignment.new2.
2194
+ def self.new2(*arg)
2195
+ OriginalAlignment.new2(*arg)
2196
+ end
2197
+
2198
+ # creates a new Bio::Alignment::OriginalAlignment object.
2199
+ # Please refer document of OriginalAlignment.readfiles.
2200
+ def self.readfiles(*files)
2201
+ OriginalAlignment.readfiles(*files)
2202
+ end
2203
+
2204
+ #---
2205
+ # Service classes for multiple alignment applications
2206
+ #+++
2207
+ #---
2208
+ # Templates of alignment application factory
2209
+ #+++
2210
+
2211
+ # Namespace for templates for alignment application factory
2212
+ module FactoryTemplate
2213
+
2214
+ # Template class for alignment application factory.
2215
+ # The program acts:
2216
+ # input: stdin or file, format = fasta format
2217
+ # output: stdout (parser should be specified by DEFAULT_PARSER)
2218
+ class Simple
2219
+
2220
+ # Creates a new alignment factory
2221
+ def initialize(program = self.class::DEFAULT_PROGRAM, options = [])
2222
+ @program = program
2223
+ @options = options
2224
+ @command = nil
2225
+ @output = nil
2226
+ @report = nil
2227
+ @exit_status = nil
2228
+ @data_stdout = nil
2229
+ end
2230
+
2231
+ # program name
2232
+ attr_accessor :program
2233
+
2234
+ # options
2235
+ attr_accessor :options
2236
+
2237
+ # Last command-line string. Returns nil or an array of String.
2238
+ # Note that filenames described in the command-line may already
2239
+ # be removed because these files may be temporary files.
2240
+ attr_reader :command
2241
+
2242
+ # Last raw result of the program.
2243
+ # Return a string (or nil).
2244
+ attr_reader :output
2245
+
2246
+ # Last result object performed by the factory.
2247
+ attr_reader :report
2248
+
2249
+ # Last exit status
2250
+ attr_reader :exit_status
2251
+
2252
+ # Last output to the stdout.
2253
+ attr_accessor :data_stdout
2254
+
2255
+ # Clear the internal data and status, except program and options.
2256
+ def reset
2257
+ @command = nil
2258
+ @output = nil
2259
+ @report = nil
2260
+ @exit_status = nil
2261
+ @data_stdout = nil
2262
+ end
2263
+
2264
+ # Executes the program.
2265
+ # If +seqs+ is not nil, perform alignment for seqs.
2266
+ # If +seqs+ is nil, simply executes the program.
2267
+ #
2268
+ # Compatibility note: When seqs is nil,
2269
+ # returns true if the program exits normally, and
2270
+ # returns false if the program exits abnormally.
2271
+ def query(seqs)
2272
+ if seqs then
2273
+ query_alignment(seqs)
2274
+ else
2275
+ exec_local(@options)
2276
+ @exit_status.exitstatus == 0 ? true : false
2277
+ end
2278
+ end
2279
+
2280
+ # Performs alignment for seqs.
2281
+ # +seqs+ should be Bio::Alignment or Array of sequences or nil.
2282
+ def query_alignment(seqs)
2283
+ unless seqs.respond_to?(:output_fasta) then
2284
+ seqs = Bio::Alignment.new(seqs)
2285
+ end
2286
+ query_string(seqs.output_fasta(:width => 70))
2287
+ end
2288
+
2289
+ # alias of query_alignment.
2290
+ #
2291
+ # Compatibility Note: query_align will renamed to query_alignment.
2292
+ def query_align(seqs)
2293
+ #warn 'query_align is renamed to query_alignment.'
2294
+ query_alignment(seqs)
2295
+ end
2296
+
2297
+ # Performs alignment for +str+.
2298
+ # The +str+ should be a string that can be recognized by the program.
2299
+ def query_string(str)
2300
+ _query_string(str, @options)
2301
+ @report
2302
+ end
2303
+
2304
+ # Performs alignment of sequences in the file named +fn+.
2305
+ def query_by_filename(filename_in)
2306
+ _query_local(filename_in, @options)
2307
+ @report
2308
+ end
2309
+
2310
+ private
2311
+ # Executes a program in the local machine.
2312
+ def exec_local(opt, data_stdin = nil)
2313
+ @exit_status = nil
2314
+ @command = [ @program, *opt ]
2315
+ #STDERR.print "DEBUG: ", @command.join(" "), "\n"
2316
+ @data_stdout = Bio::Command.query_command(@command, data_stdin)
2317
+ @exit_status = $?
2318
+ end
2319
+
2320
+ # prepare temporary file
2321
+ def _prepare_tempfile(str = nil)
2322
+ tf_in = Tempfile.open(str ? 'alignment_i' :'alignment_o')
2323
+ tf_in.print str if str
2324
+ tf_in.close(false)
2325
+ tf_in
2326
+ end
2327
+
2328
+ # generates options specifying input/output filename.
2329
+ # nil for filename means stdin or stdout.
2330
+ # +options+ must not contain specify filenames.
2331
+ # returns an array of string.
2332
+ def _generate_options(infile, outfile, options)
2333
+ options +
2334
+ (infile ? _option_input_file(infile) : _option_input_stdin) +
2335
+ (outfile ? _option_output_file(outfile) : _option_output_stdout)
2336
+ end
2337
+
2338
+ # generates options specifying input filename.
2339
+ # returns an array of string
2340
+ def _option_input_file(fn)
2341
+ [ fn ]
2342
+ end
2343
+
2344
+ # generates options specifying output filename.
2345
+ # returns an array of string
2346
+ def _option_output_file(fn)
2347
+ raise 'can not specify output file: always stdout'
2348
+ end
2349
+
2350
+ # generates options specifying that input is taken from stdin.
2351
+ # returns an array of string
2352
+ def _option_input_stdin
2353
+ []
2354
+ end
2355
+
2356
+ # generates options specifying output to stdout.
2357
+ # returns an array of string
2358
+ def _option_output_stdout
2359
+ []
2360
+ end
2361
+ end #class Simple
2362
+
2363
+ # mix-in module
2364
+ module WrapInputStdin
2365
+ private
2366
+ # Performs alignment for +str+.
2367
+ # The +str+ should be a string that can be recognized by the program.
2368
+ def _query_string(str, opt)
2369
+ _query_local(nil, opt, str)
2370
+ end
2371
+ end #module WrapInputStdin
2372
+
2373
+ # mix-in module
2374
+ module WrapInputTempfile
2375
+ private
2376
+ # Performs alignment for +str+.
2377
+ # The +str+ should be a string that can be recognized by the program.
2378
+ def _query_string(str, opt)
2379
+ begin
2380
+ tf_in = _prepare_tempfile(str)
2381
+ ret = _query_local(tf_in.path, opt, nil)
2382
+ ensure
2383
+ tf_in.close(true) if tf_in
2384
+ end
2385
+ ret
2386
+ end
2387
+ end #module WrapInputTempfile
2388
+
2389
+ # mix-in module
2390
+ module WrapOutputStdout
2391
+ private
2392
+ # Performs alignment by specified filenames
2393
+ def _query_local(fn_in, opt, data_stdin = nil)
2394
+ opt = _generate_options(fn_in, nil, opt)
2395
+ exec_local(opt, data_stdin)
2396
+ @output = @data_stdout
2397
+ @report = self.class::DEFAULT_PARSER.new(@output)
2398
+ @report
2399
+ end
2400
+ end #module WrapOutputStdout
2401
+
2402
+ # mix-in module
2403
+ module WrapOutputTempfile
2404
+ private
2405
+ # Performs alignment
2406
+ def _query_local(fn_in, opt, data_stdin = nil)
2407
+ begin
2408
+ tf_out = _prepare_tempfile()
2409
+ opt = _generate_options(fn_in, tf_out.path, opt)
2410
+ exec_local(opt, data_stdin)
2411
+ tf_out.open
2412
+ @output = tf_out.read
2413
+ ensure
2414
+ tf_out.close(true) if tf_out
2415
+ end
2416
+ @report = self.class::DEFAULT_PARSER.new(@output)
2417
+ @report
2418
+ end
2419
+ end #module WrapOutputTempfile
2420
+
2421
+ # Template class for alignment application factory.
2422
+ # The program needs:
2423
+ # input: file (cannot accept stdin), format = fasta format
2424
+ # output: stdout (parser should be specified by DEFAULT_PARSER)
2425
+ class FileInStdoutOut < Simple
2426
+ include Bio::Alignment::FactoryTemplate::WrapInputTempfile
2427
+ include Bio::Alignment::FactoryTemplate::WrapOutputStdout
2428
+
2429
+ private
2430
+ # generates options specifying that input is taken from stdin.
2431
+ # returns an array of string
2432
+ def _option_input_stdin
2433
+ raise 'input is always a file'
2434
+ end
2435
+ end #class FileInStdoutOut
2436
+
2437
+ # Template class for alignment application factory.
2438
+ # The program needs:
2439
+ # input: stdin or file, format = fasta format
2440
+ # output: file (parser should be specified by DEFAULT_PARSER)
2441
+ class StdinInFileOut < Simple
2442
+ include Bio::Alignment::FactoryTemplate::WrapInputStdin
2443
+ include Bio::Alignment::FactoryTemplate::WrapOutputTempfile
2444
+
2445
+ private
2446
+ # generates options specifying output to stdout.
2447
+ # returns an array of string
2448
+ def _option_output_stdout
2449
+ raise 'output is always a file'
2450
+ end
2451
+ end #class StdinInFileOut
2452
+
2453
+ # Template class for alignment application factory.
2454
+ # The program needs:
2455
+ # input: file (cannot accept stdin), format = fasta format
2456
+ # output: file (parser should be specified by DEFAULT_PARSER)
2457
+ class FileInFileOut < Simple
2458
+ include Bio::Alignment::FactoryTemplate::WrapInputTempfile
2459
+ include Bio::Alignment::FactoryTemplate::WrapOutputTempfile
2460
+
2461
+ private
2462
+ # generates options specifying that input is taken from stdin.
2463
+ # returns an array of string
2464
+ def _option_input_stdin
2465
+ raise 'input is always a file'
2466
+ end
2467
+
2468
+ # generates options specifying output to stdout.
2469
+ # returns an array of string
2470
+ def _option_output_stdout
2471
+ raise 'output is always a file'
2472
+ end
2473
+ end #class FileInFileOut
2474
+
2475
+ # Template class for alignment application factory.
2476
+ # The program needs:
2477
+ # input: file (cannot accept stdin), format = fasta format
2478
+ # output: file (parser should be specified by DEFAULT_PARSER)
2479
+ # Tree (*.dnd) output is also supported.
2480
+ class FileInFileOutWithTree < FileInFileOut
2481
+
2482
+ # alignment guide tree generated by the program (*.dnd file)
2483
+ attr_reader :output_dnd
2484
+
2485
+ def reset
2486
+ @output_dnd = nil
2487
+ super
2488
+ end
2489
+
2490
+ private
2491
+ # Performs alignment
2492
+ def _query_local(fn_in, opt, data_stdin = nil)
2493
+ begin
2494
+ tf_dnd = _prepare_tempfile()
2495
+ opt = opt + _option_output_dndfile(tf_dnd.path)
2496
+ ret = super(fn_in, opt, data_stdin)
2497
+ tf_dnd.open
2498
+ @output_dnd = tf_dnd.read
2499
+ ensure
2500
+ tf_dnd.close(true) if tf_dnd
2501
+ end
2502
+ ret
2503
+ end
2504
+
2505
+ # generates options specifying output tree file (*.dnd).
2506
+ # returns an array of string
2507
+ def _option_output_dndfile
2508
+ raise NotImplementedError
2509
+ end
2510
+ end #class FileInFileOutWithTree
2511
+
2512
+ end #module FactoryTemplate
2513
+
2514
+
2515
+ end #module Alignment
2516
+
2517
+ end #module Bio
2518
+