wwood-bioruby 1.2.11
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +205 -0
- data/README_DEV.rdoc +285 -0
- data/VERSION.yml +4 -0
- data/bin/bioruby +44 -0
- data/bin/br_biofetch.rb +47 -0
- data/bin/br_bioflat.rb +293 -0
- data/bin/br_biogetseq.rb +45 -0
- data/bin/br_pmfetch.rb +421 -0
- data/lib/bio.rb +306 -0
- data/lib/bio/alignment.rb +2518 -0
- data/lib/bio/appl/bl2seq/report.rb +334 -0
- data/lib/bio/appl/blast.rb +505 -0
- data/lib/bio/appl/blast/ddbj.rb +142 -0
- data/lib/bio/appl/blast/format0.rb +1438 -0
- data/lib/bio/appl/blast/format8.rb +83 -0
- data/lib/bio/appl/blast/genomenet.rb +263 -0
- data/lib/bio/appl/blast/ncbioptions.rb +220 -0
- data/lib/bio/appl/blast/remote.rb +105 -0
- data/lib/bio/appl/blast/report.rb +767 -0
- data/lib/bio/appl/blast/rexml.rb +144 -0
- data/lib/bio/appl/blast/rpsblast.rb +277 -0
- data/lib/bio/appl/blast/wublast.rb +635 -0
- data/lib/bio/appl/blast/xmlparser.rb +236 -0
- data/lib/bio/appl/blat/report.rb +530 -0
- data/lib/bio/appl/clustalw.rb +219 -0
- data/lib/bio/appl/clustalw/report.rb +152 -0
- data/lib/bio/appl/emboss.rb +203 -0
- data/lib/bio/appl/fasta.rb +235 -0
- data/lib/bio/appl/fasta/format10.rb +325 -0
- data/lib/bio/appl/gcg/msf.rb +212 -0
- data/lib/bio/appl/gcg/seq.rb +195 -0
- data/lib/bio/appl/genscan/report.rb +552 -0
- data/lib/bio/appl/hmmer.rb +126 -0
- data/lib/bio/appl/hmmer/report.rb +683 -0
- data/lib/bio/appl/iprscan/report.rb +374 -0
- data/lib/bio/appl/mafft.rb +259 -0
- data/lib/bio/appl/mafft/report.rb +226 -0
- data/lib/bio/appl/muscle.rb +52 -0
- data/lib/bio/appl/paml/baseml.rb +95 -0
- data/lib/bio/appl/paml/baseml/report.rb +32 -0
- data/lib/bio/appl/paml/codeml.rb +242 -0
- data/lib/bio/appl/paml/codeml/rates.rb +67 -0
- data/lib/bio/appl/paml/codeml/report.rb +67 -0
- data/lib/bio/appl/paml/common.rb +348 -0
- data/lib/bio/appl/paml/common_report.rb +38 -0
- data/lib/bio/appl/paml/yn00.rb +103 -0
- data/lib/bio/appl/paml/yn00/report.rb +32 -0
- data/lib/bio/appl/phylip/alignment.rb +133 -0
- data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
- data/lib/bio/appl/probcons.rb +41 -0
- data/lib/bio/appl/psort.rb +548 -0
- data/lib/bio/appl/psort/report.rb +542 -0
- data/lib/bio/appl/pts1.rb +263 -0
- data/lib/bio/appl/sim4.rb +124 -0
- data/lib/bio/appl/sim4/report.rb +485 -0
- data/lib/bio/appl/sosui/report.rb +151 -0
- data/lib/bio/appl/spidey/report.rb +593 -0
- data/lib/bio/appl/targetp/report.rb +267 -0
- data/lib/bio/appl/tcoffee.rb +55 -0
- data/lib/bio/appl/tmhmm/report.rb +231 -0
- data/lib/bio/command.rb +593 -0
- data/lib/bio/compat/features.rb +157 -0
- data/lib/bio/compat/references.rb +128 -0
- data/lib/bio/data/aa.rb +353 -0
- data/lib/bio/data/codontable.rb +722 -0
- data/lib/bio/data/na.rb +223 -0
- data/lib/bio/db.rb +329 -0
- data/lib/bio/db/aaindex.rb +357 -0
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
- data/lib/bio/db/biosql/sequence.rb +508 -0
- data/lib/bio/db/embl/common.rb +352 -0
- data/lib/bio/db/embl/embl.rb +500 -0
- data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
- data/lib/bio/db/embl/format_embl.rb +190 -0
- data/lib/bio/db/embl/sptr.rb +1283 -0
- data/lib/bio/db/embl/swissprot.rb +42 -0
- data/lib/bio/db/embl/trembl.rb +41 -0
- data/lib/bio/db/embl/uniprot.rb +42 -0
- data/lib/bio/db/fantom.rb +597 -0
- data/lib/bio/db/fasta.rb +410 -0
- data/lib/bio/db/fasta/defline.rb +532 -0
- data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
- data/lib/bio/db/fasta/format_fasta.rb +97 -0
- data/lib/bio/db/genbank/common.rb +307 -0
- data/lib/bio/db/genbank/ddbj.rb +22 -0
- data/lib/bio/db/genbank/format_genbank.rb +187 -0
- data/lib/bio/db/genbank/genbank.rb +250 -0
- data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
- data/lib/bio/db/genbank/genpept.rb +60 -0
- data/lib/bio/db/genbank/refseq.rb +18 -0
- data/lib/bio/db/gff.rb +1846 -0
- data/lib/bio/db/go.rb +481 -0
- data/lib/bio/db/kegg/brite.rb +41 -0
- data/lib/bio/db/kegg/compound.rb +131 -0
- data/lib/bio/db/kegg/drug.rb +98 -0
- data/lib/bio/db/kegg/enzyme.rb +148 -0
- data/lib/bio/db/kegg/expression.rb +155 -0
- data/lib/bio/db/kegg/genes.rb +263 -0
- data/lib/bio/db/kegg/genome.rb +241 -0
- data/lib/bio/db/kegg/glycan.rb +166 -0
- data/lib/bio/db/kegg/keggtab.rb +357 -0
- data/lib/bio/db/kegg/kgml.rb +256 -0
- data/lib/bio/db/kegg/orthology.rb +136 -0
- data/lib/bio/db/kegg/reaction.rb +82 -0
- data/lib/bio/db/kegg/taxonomy.rb +331 -0
- data/lib/bio/db/lasergene.rb +209 -0
- data/lib/bio/db/litdb.rb +107 -0
- data/lib/bio/db/medline.rb +326 -0
- data/lib/bio/db/nbrf.rb +191 -0
- data/lib/bio/db/newick.rb +658 -0
- data/lib/bio/db/nexus.rb +1854 -0
- data/lib/bio/db/pdb.rb +29 -0
- data/lib/bio/db/pdb/atom.rb +77 -0
- data/lib/bio/db/pdb/chain.rb +210 -0
- data/lib/bio/db/pdb/chemicalcomponent.rb +224 -0
- data/lib/bio/db/pdb/model.rb +148 -0
- data/lib/bio/db/pdb/pdb.rb +1911 -0
- data/lib/bio/db/pdb/residue.rb +176 -0
- data/lib/bio/db/pdb/utils.rb +399 -0
- data/lib/bio/db/prosite.rb +597 -0
- data/lib/bio/db/rebase.rb +456 -0
- data/lib/bio/db/soft.rb +404 -0
- data/lib/bio/db/transfac.rb +375 -0
- data/lib/bio/db/url.rb +42 -0
- data/lib/bio/feature.rb +139 -0
- data/lib/bio/io/biosql/biodatabase.rb +64 -0
- data/lib/bio/io/biosql/bioentry.rb +29 -0
- data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
- data/lib/bio/io/biosql/bioentry_path.rb +12 -0
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
- data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
- data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
- data/lib/bio/io/biosql/biosequence.rb +11 -0
- data/lib/bio/io/biosql/comment.rb +7 -0
- data/lib/bio/io/biosql/config/database.yml +20 -0
- data/lib/bio/io/biosql/dbxref.rb +13 -0
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
- data/lib/bio/io/biosql/location.rb +32 -0
- data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
- data/lib/bio/io/biosql/ontology.rb +10 -0
- data/lib/bio/io/biosql/reference.rb +9 -0
- data/lib/bio/io/biosql/seqfeature.rb +32 -0
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
- data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
- data/lib/bio/io/biosql/taxon.rb +12 -0
- data/lib/bio/io/biosql/taxon_name.rb +9 -0
- data/lib/bio/io/biosql/term.rb +27 -0
- data/lib/bio/io/biosql/term_dbxref.rb +11 -0
- data/lib/bio/io/biosql/term_path.rb +12 -0
- data/lib/bio/io/biosql/term_relationship.rb +13 -0
- data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
- data/lib/bio/io/biosql/term_synonym.rb +10 -0
- data/lib/bio/io/das.rb +461 -0
- data/lib/bio/io/dbget.rb +194 -0
- data/lib/bio/io/ddbjxml.rb +638 -0
- data/lib/bio/io/ebisoap.rb +158 -0
- data/lib/bio/io/ensembl.rb +229 -0
- data/lib/bio/io/fastacmd.rb +163 -0
- data/lib/bio/io/fetch.rb +195 -0
- data/lib/bio/io/flatfile.rb +482 -0
- data/lib/bio/io/flatfile/autodetection.rb +545 -0
- data/lib/bio/io/flatfile/bdb.rb +253 -0
- data/lib/bio/io/flatfile/buffer.rb +237 -0
- data/lib/bio/io/flatfile/index.rb +1381 -0
- data/lib/bio/io/flatfile/indexer.rb +805 -0
- data/lib/bio/io/flatfile/splitter.rb +297 -0
- data/lib/bio/io/higet.rb +73 -0
- data/lib/bio/io/hinv.rb +442 -0
- data/lib/bio/io/keggapi.rb +805 -0
- data/lib/bio/io/ncbirest.rb +733 -0
- data/lib/bio/io/ncbisoap.rb +155 -0
- data/lib/bio/io/pubmed.rb +307 -0
- data/lib/bio/io/registry.rb +292 -0
- data/lib/bio/io/soapwsdl.rb +119 -0
- data/lib/bio/io/sql.rb +186 -0
- data/lib/bio/location.rb +867 -0
- data/lib/bio/map.rb +410 -0
- data/lib/bio/pathway.rb +960 -0
- data/lib/bio/reference.rb +602 -0
- data/lib/bio/sequence.rb +456 -0
- data/lib/bio/sequence/aa.rb +152 -0
- data/lib/bio/sequence/adapter.rb +108 -0
- data/lib/bio/sequence/common.rb +310 -0
- data/lib/bio/sequence/compat.rb +123 -0
- data/lib/bio/sequence/dblink.rb +54 -0
- data/lib/bio/sequence/format.rb +358 -0
- data/lib/bio/sequence/format_raw.rb +23 -0
- data/lib/bio/sequence/generic.rb +24 -0
- data/lib/bio/sequence/na.rb +491 -0
- data/lib/bio/shell.rb +44 -0
- data/lib/bio/shell/core.rb +578 -0
- data/lib/bio/shell/demo.rb +146 -0
- data/lib/bio/shell/interface.rb +218 -0
- data/lib/bio/shell/irb.rb +95 -0
- data/lib/bio/shell/object.rb +71 -0
- data/lib/bio/shell/plugin/blast.rb +42 -0
- data/lib/bio/shell/plugin/codon.rb +218 -0
- data/lib/bio/shell/plugin/das.rb +58 -0
- data/lib/bio/shell/plugin/emboss.rb +23 -0
- data/lib/bio/shell/plugin/entry.rb +105 -0
- data/lib/bio/shell/plugin/flatfile.rb +101 -0
- data/lib/bio/shell/plugin/keggapi.rb +181 -0
- data/lib/bio/shell/plugin/midi.rb +430 -0
- data/lib/bio/shell/plugin/obda.rb +45 -0
- data/lib/bio/shell/plugin/psort.rb +56 -0
- data/lib/bio/shell/plugin/seq.rb +247 -0
- data/lib/bio/shell/plugin/soap.rb +87 -0
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/bioruby_generator.rb +29 -0
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_classes.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_log.rhtml +27 -0
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_methods.rhtml +11 -0
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_modules.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_variables.rhtml +7 -0
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-gem.png +0 -0
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.css +368 -0
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.rhtml +47 -0
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_controller.rb +144 -0
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_helper.rb +47 -0
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/commands.rhtml +8 -0
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/history.rhtml +10 -0
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/index.rhtml +26 -0
- data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/spinner.gif +0 -0
- data/lib/bio/shell/script.rb +25 -0
- data/lib/bio/shell/setup.rb +109 -0
- data/lib/bio/shell/web.rb +102 -0
- data/lib/bio/tree.rb +852 -0
- data/lib/bio/util/color_scheme.rb +191 -0
- data/lib/bio/util/color_scheme/buried.rb +59 -0
- data/lib/bio/util/color_scheme/helix.rb +59 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +64 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +31 -0
- data/lib/bio/util/color_scheme/strand.rb +59 -0
- data/lib/bio/util/color_scheme/taylor.rb +50 -0
- data/lib/bio/util/color_scheme/turn.rb +59 -0
- data/lib/bio/util/color_scheme/zappo.rb +50 -0
- data/lib/bio/util/contingency_table.rb +370 -0
- data/lib/bio/util/restriction_enzyme.rb +228 -0
- data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
- data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
- data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
- data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
- data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
- data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
- data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
- data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
- data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
- data/lib/bio/util/restriction_enzyme/single_strand.rb +200 -0
- data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
- data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
- data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
- data/lib/bio/util/sirna.rb +288 -0
- data/test/data/HMMER/hmmpfam.out +64 -0
- data/test/data/HMMER/hmmsearch.out +88 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/aaindex/DAYM780301 +30 -0
- data/test/data/aaindex/PRAM900102 +20 -0
- data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
- data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
- data/test/data/blast/2.2.15.blastp.m7 +876 -0
- data/test/data/blast/b0002.faa +15 -0
- data/test/data/blast/b0002.faa.m0 +128 -0
- data/test/data/blast/b0002.faa.m7 +65 -0
- data/test/data/blast/b0002.faa.m8 +1 -0
- data/test/data/blast/blastp-multi.m7 +188 -0
- data/test/data/command/echoarg2.bat +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/embl/AB090716.embl.rel89 +63 -0
- data/test/data/fasta/example1.txt +75 -0
- data/test/data/fasta/example2.txt +21 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/iprscan/merged.raw +32 -0
- data/test/data/iprscan/merged.txt +74 -0
- data/test/data/paml/codeml/control_file.txt +30 -0
- data/test/data/paml/codeml/output.txt +78 -0
- data/test/data/paml/codeml/rates +217 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/rpsblast/misc.rpsblast +193 -0
- data/test/data/soft/GDS100_partial.soft +92 -0
- data/test/data/soft/GSE3457_family_partial.soft +874 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/functional/bio/appl/test_pts1.rb +115 -0
- data/test/functional/bio/io/test_ensembl.rb +229 -0
- data/test/functional/bio/io/test_soapwsdl.rb +52 -0
- data/test/functional/bio/sequence/test_output_embl.rb +51 -0
- data/test/functional/bio/test_command.rb +301 -0
- data/test/runner.rb +14 -0
- data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
- data/test/unit/bio/appl/blast/test_report.rb +1135 -0
- data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
- data/test/unit/bio/appl/genscan/test_report.rb +182 -0
- data/test/unit/bio/appl/hmmer/test_report.rb +342 -0
- data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
- data/test/unit/bio/appl/mafft/test_report.rb +63 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
- data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
- data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
- data/test/unit/bio/appl/sosui/test_report.rb +81 -0
- data/test/unit/bio/appl/targetp/test_report.rb +146 -0
- data/test/unit/bio/appl/test_blast.rb +277 -0
- data/test/unit/bio/appl/test_fasta.rb +130 -0
- data/test/unit/bio/appl/test_psort.rb +57 -0
- data/test/unit/bio/appl/test_pts1.rb +77 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +126 -0
- data/test/unit/bio/data/test_aa.rb +90 -0
- data/test/unit/bio/data/test_codontable.rb +107 -0
- data/test/unit/bio/data/test_na.rb +80 -0
- data/test/unit/bio/db/embl/test_common.rb +117 -0
- data/test/unit/bio/db/embl/test_embl.rb +214 -0
- data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
- data/test/unit/bio/db/embl/test_sptr.rb +1812 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +31 -0
- data/test/unit/bio/db/kegg/test_genes.rb +45 -0
- data/test/unit/bio/db/pdb/test_pdb.rb +152 -0
- data/test/unit/bio/db/test_aaindex.rb +197 -0
- data/test/unit/bio/db/test_fasta.rb +250 -0
- data/test/unit/bio/db/test_gff.rb +1190 -0
- data/test/unit/bio/db/test_lasergene.rb +95 -0
- data/test/unit/bio/db/test_medline.rb +127 -0
- data/test/unit/bio/db/test_newick.rb +293 -0
- data/test/unit/bio/db/test_nexus.rb +364 -0
- data/test/unit/bio/db/test_prosite.rb +1437 -0
- data/test/unit/bio/db/test_rebase.rb +101 -0
- data/test/unit/bio/db/test_soft.rb +138 -0
- data/test/unit/bio/db/test_url.rb +36 -0
- data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
- data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
- data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
- data/test/unit/bio/io/test_ddbjxml.rb +80 -0
- data/test/unit/bio/io/test_ensembl.rb +109 -0
- data/test/unit/bio/io/test_fastacmd.rb +42 -0
- data/test/unit/bio/io/test_flatfile.rb +505 -0
- data/test/unit/bio/io/test_soapwsdl.rb +32 -0
- data/test/unit/bio/sequence/test_aa.rb +115 -0
- data/test/unit/bio/sequence/test_common.rb +373 -0
- data/test/unit/bio/sequence/test_compat.rb +69 -0
- data/test/unit/bio/sequence/test_dblink.rb +58 -0
- data/test/unit/bio/sequence/test_na.rb +330 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +185 -0
- data/test/unit/bio/test_alignment.rb +1025 -0
- data/test/unit/bio/test_command.rb +349 -0
- data/test/unit/bio/test_db.rb +96 -0
- data/test/unit/bio/test_feature.rb +144 -0
- data/test/unit/bio/test_location.rb +599 -0
- data/test/unit/bio/test_map.rb +230 -0
- data/test/unit/bio/test_pathway.rb +499 -0
- data/test/unit/bio/test_reference.rb +252 -0
- data/test/unit/bio/test_sequence.rb +329 -0
- data/test/unit/bio/test_shell.rb +18 -0
- data/test/unit/bio/test_tree.rb +593 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +101 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
- data/test/unit/bio/util/test_color_scheme.rb +33 -0
- data/test/unit/bio/util/test_contingency_table.rb +94 -0
- data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
- data/test/unit/bio/util/test_sirna.rb +245 -0
- metadata +543 -0
data/lib/bio.rb
ADDED
@@ -0,0 +1,306 @@
|
|
1
|
+
#
|
2
|
+
# = bio.rb - Loading all BioRuby modules
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2001-2007
|
5
|
+
# Toshiaki Katayama <k@bioruby.org>
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
# $Id:$
|
9
|
+
#
|
10
|
+
|
11
|
+
module Bio
|
12
|
+
|
13
|
+
BIORUBY_VERSION = [1, 3, 0].extend(Comparable)
|
14
|
+
|
15
|
+
### Basic data types
|
16
|
+
|
17
|
+
## Sequence
|
18
|
+
|
19
|
+
autoload :Sequence, 'bio/sequence'
|
20
|
+
## below are described in bio/sequence.rb
|
21
|
+
#class Sequence
|
22
|
+
# autoload :Common, 'bio/sequence/common'
|
23
|
+
# autoload :NA, 'bio/sequence/na'
|
24
|
+
# autoload :AA, 'bio/sequence/aa'
|
25
|
+
# autoload :Generic, 'bio/sequence/generic'
|
26
|
+
# autoload :Format, 'bio/sequence/format'
|
27
|
+
# autoload :Adapter, 'bio/sequence/adapter'
|
28
|
+
#end
|
29
|
+
|
30
|
+
## Locations/Location
|
31
|
+
|
32
|
+
autoload :Location, 'bio/location'
|
33
|
+
autoload :Locations, 'bio/location'
|
34
|
+
|
35
|
+
## Features/Feature
|
36
|
+
|
37
|
+
autoload :Feature, 'bio/feature'
|
38
|
+
autoload :Features, 'bio/compat/features'
|
39
|
+
|
40
|
+
## References/Reference
|
41
|
+
|
42
|
+
autoload :Reference, 'bio/reference'
|
43
|
+
autoload :References, 'bio/compat/references'
|
44
|
+
|
45
|
+
## Pathway/Relation
|
46
|
+
|
47
|
+
autoload :Pathway, 'bio/pathway'
|
48
|
+
autoload :Relation, 'bio/pathway'
|
49
|
+
|
50
|
+
## Alignment
|
51
|
+
|
52
|
+
autoload :Alignment, 'bio/alignment'
|
53
|
+
|
54
|
+
## Tree
|
55
|
+
autoload :Tree, 'bio/tree'
|
56
|
+
|
57
|
+
## Map
|
58
|
+
autoload :Map, 'bio/map'
|
59
|
+
|
60
|
+
### Constants
|
61
|
+
|
62
|
+
autoload :NucleicAcid, 'bio/data/na'
|
63
|
+
autoload :AminoAcid, 'bio/data/aa'
|
64
|
+
autoload :CodonTable, 'bio/data/codontable'
|
65
|
+
|
66
|
+
|
67
|
+
### DB parsers
|
68
|
+
|
69
|
+
autoload :DB, 'bio/db'
|
70
|
+
autoload :NCBIDB, 'bio/db'
|
71
|
+
autoload :KEGGDB, 'bio/db'
|
72
|
+
autoload :EMBLDB, 'bio/db'
|
73
|
+
|
74
|
+
|
75
|
+
### URL Generators
|
76
|
+
|
77
|
+
autoload :URLGenerators, 'bio/db/url'
|
78
|
+
autoload :URL, 'bio/db/url'
|
79
|
+
|
80
|
+
## GenBank/RefSeq/DDBJ
|
81
|
+
|
82
|
+
autoload :GenBank, 'bio/db/genbank/genbank'
|
83
|
+
autoload :GenPept, 'bio/db/genbank/genpept'
|
84
|
+
autoload :RefSeq, 'bio/db/genbank/refseq'
|
85
|
+
autoload :DDBJ, 'bio/db/genbank/ddbj'
|
86
|
+
## below are described in bio/db/genbank/ddbj.rb
|
87
|
+
#class DDBJ
|
88
|
+
# autoload :XML, 'bio/io/ddbjxml'
|
89
|
+
#end
|
90
|
+
|
91
|
+
## EMBL/TrEMBL/Swiss-Prot/SPTR
|
92
|
+
|
93
|
+
autoload :EMBL, 'bio/db/embl/embl'
|
94
|
+
autoload :SPTR, 'bio/db/embl/sptr'
|
95
|
+
autoload :TrEMBL, 'bio/db/embl/trembl'
|
96
|
+
autoload :UniProt, 'bio/db/embl/uniprot'
|
97
|
+
autoload :SwissProt, 'bio/db/embl/swissprot'
|
98
|
+
|
99
|
+
## KEGG
|
100
|
+
|
101
|
+
class KEGG
|
102
|
+
autoload :GENOME, 'bio/db/kegg/genome'
|
103
|
+
autoload :GENES, 'bio/db/kegg/genes'
|
104
|
+
autoload :ENZYME, 'bio/db/kegg/enzyme'
|
105
|
+
autoload :COMPOUND, 'bio/db/kegg/compound'
|
106
|
+
autoload :DRUG, 'bio/db/kegg/drug'
|
107
|
+
autoload :GLYCAN, 'bio/db/kegg/glycan'
|
108
|
+
autoload :REACTION, 'bio/db/kegg/reaction'
|
109
|
+
autoload :BRITE, 'bio/db/kegg/brite'
|
110
|
+
autoload :CELL, 'bio/db/kegg/cell'
|
111
|
+
autoload :EXPRESSION, 'bio/db/kegg/expression'
|
112
|
+
autoload :ORTHOLOGY, 'bio/db/kegg/orthology'
|
113
|
+
autoload :KGML, 'bio/db/kegg/kgml'
|
114
|
+
autoload :Taxonomy, 'bio/db/kegg/taxonomy'
|
115
|
+
end
|
116
|
+
|
117
|
+
## other formats
|
118
|
+
|
119
|
+
autoload :FastaFormat, 'bio/db/fasta'
|
120
|
+
autoload :FastaNumericFormat, 'bio/db/fasta' # change to FastaFormat::Numeric ?
|
121
|
+
autoload :FastaDefline, 'bio/db/fasta' # change to FastaFormat::Defline
|
122
|
+
autoload :GFF, 'bio/db/gff'
|
123
|
+
autoload :AAindex, 'bio/db/aaindex'
|
124
|
+
autoload :AAindex1, 'bio/db/aaindex' # change to AAindex::AAindex1 ?
|
125
|
+
autoload :AAindex2, 'bio/db/aaindex' # change to AAindex::AAindex2 ?
|
126
|
+
autoload :TRANSFAC, 'bio/db/transfac'
|
127
|
+
autoload :PROSITE, 'bio/db/prosite'
|
128
|
+
autoload :LITDB, 'bio/db/litdb'
|
129
|
+
autoload :MEDLINE, 'bio/db/medline'
|
130
|
+
autoload :FANTOM, 'bio/db/fantom'
|
131
|
+
autoload :GO, 'bio/db/go'
|
132
|
+
autoload :PDB, 'bio/db/pdb'
|
133
|
+
autoload :NBRF, 'bio/db/nbrf'
|
134
|
+
autoload :REBASE, 'bio/db/rebase'
|
135
|
+
autoload :SOFT, 'bio/db/soft'
|
136
|
+
autoload :Lasergene, 'bio/db/lasergene'
|
137
|
+
|
138
|
+
autoload :Newick, 'bio/db/newick'
|
139
|
+
autoload :Nexus, 'bio/db/nexus'
|
140
|
+
|
141
|
+
### IO interface modules
|
142
|
+
|
143
|
+
autoload :Registry, 'bio/io/registry'
|
144
|
+
autoload :Fetch, 'bio/io/fetch'
|
145
|
+
autoload :SQL, 'bio/io/sql'
|
146
|
+
autoload :SOAPWSDL, 'bio/io/soapwsdl'
|
147
|
+
autoload :FlatFile, 'bio/io/flatfile'
|
148
|
+
autoload :FlatFileIndex, 'bio/io/flatfile/index' # chage to FlatFile::Index ?
|
149
|
+
## below are described in bio/io/flatfile/index.rb
|
150
|
+
#class FlatFileIndex
|
151
|
+
# autoload :Indexer, 'bio/io/flatfile/indexer'
|
152
|
+
# autoload :BDBdefault, 'bio/io/flatfile/bdb'
|
153
|
+
# autoload :BDBwrapper, 'bio/io/flatfile/bdb'
|
154
|
+
# autoload :BDB_1, 'bio/io/flatfile/bdb'
|
155
|
+
#end
|
156
|
+
|
157
|
+
autoload :PubMed, 'bio/io/pubmed'
|
158
|
+
autoload :DAS, 'bio/io/das'
|
159
|
+
autoload :DBGET, 'bio/io/dbget'
|
160
|
+
|
161
|
+
autoload :Ensembl, 'bio/io/ensembl'
|
162
|
+
autoload :Hinv, 'bio/io/hinv'
|
163
|
+
|
164
|
+
## below are described in bio/appl/blast.rb
|
165
|
+
#class Blast
|
166
|
+
# autoload :Fastacmd, 'bio/io/fastacmd'
|
167
|
+
#end
|
168
|
+
|
169
|
+
class KEGG
|
170
|
+
autoload :API, 'bio/io/keggapi'
|
171
|
+
end
|
172
|
+
|
173
|
+
## below are described in bio/db/genbank/ddbj.rb
|
174
|
+
#class DDBJ
|
175
|
+
# autoload :XML, 'bio/io/ddbjxml'
|
176
|
+
#end
|
177
|
+
|
178
|
+
class HGC
|
179
|
+
autoload :HiGet, 'bio/io/higet'
|
180
|
+
end
|
181
|
+
|
182
|
+
class EBI
|
183
|
+
autoload :SOAP, 'bio/io/ebisoap'
|
184
|
+
end
|
185
|
+
|
186
|
+
class NCBI
|
187
|
+
autoload :SOAP, 'bio/io/ncbisoap'
|
188
|
+
autoload :REST, 'bio/io/ncbirest'
|
189
|
+
end
|
190
|
+
|
191
|
+
|
192
|
+
### Applications
|
193
|
+
|
194
|
+
autoload :Fasta, 'bio/appl/fasta'
|
195
|
+
## below are described in bio/appl/fasta.rb
|
196
|
+
#class Fasta
|
197
|
+
# autoload :Report, 'bio/appl/fasta/format10'
|
198
|
+
#end
|
199
|
+
|
200
|
+
autoload :Blast, 'bio/appl/blast'
|
201
|
+
## below are described in bio/appl/blast.rb
|
202
|
+
#class Blast
|
203
|
+
# autoload :Fastacmd, 'bio/io/fastacmd'
|
204
|
+
# autoload :Report, 'bio/appl/blast/report'
|
205
|
+
# autoload :Default, 'bio/appl/blast/format0'
|
206
|
+
# autoload :WU, 'bio/appl/blast/wublast'
|
207
|
+
# autoload :Bl2seq, 'bio/appl/bl2seq/report'
|
208
|
+
# autoload :RPSBlast, 'bio/appl/blast/rpsblast'
|
209
|
+
# autoload :NCBIOptions, 'bio/appl/blast/ncbioptions'
|
210
|
+
# autoload :Remote, 'bio/appl/blast/remote'
|
211
|
+
#end
|
212
|
+
|
213
|
+
autoload :HMMER, 'bio/appl/hmmer'
|
214
|
+
## below are described in bio/appl/hmmer.rb
|
215
|
+
#class HMMER
|
216
|
+
# autoload :Report, 'bio/appl/hmmer/report'
|
217
|
+
#end
|
218
|
+
|
219
|
+
autoload :EMBOSS, 'bio/appl/emboss' # use bio/command, improve
|
220
|
+
|
221
|
+
autoload :PSORT, 'bio/appl/psort'
|
222
|
+
## below are described in bio/appl/psort.rb
|
223
|
+
#class PSORT
|
224
|
+
# class PSORT1
|
225
|
+
# autoload :Report, 'bio/appl/psort/report'
|
226
|
+
# end
|
227
|
+
# class PSORT2
|
228
|
+
# autoload :Report, 'bio/appl/psort/report'
|
229
|
+
# end
|
230
|
+
#end
|
231
|
+
|
232
|
+
autoload :TMHMM, 'bio/appl/tmhmm/report'
|
233
|
+
autoload :TargetP, 'bio/appl/targetp/report'
|
234
|
+
autoload :SOSUI, 'bio/appl/sosui/report'
|
235
|
+
autoload :Genscan, 'bio/appl/genscan/report'
|
236
|
+
|
237
|
+
autoload :ClustalW, 'bio/appl/clustalw'
|
238
|
+
## below are described in bio/appl/clustalw.rb
|
239
|
+
#class ClustalW
|
240
|
+
# autoload :Report, 'bio/appl/clustalw/report'
|
241
|
+
#end
|
242
|
+
|
243
|
+
autoload :MAFFT, 'bio/appl/mafft'
|
244
|
+
## below are described in bio/appl/mafft.rb
|
245
|
+
#class MAFFT
|
246
|
+
# autoload :Report, 'bio/appl/mafft/report'
|
247
|
+
#end
|
248
|
+
|
249
|
+
autoload :Tcoffee, 'bio/appl/tcoffee'
|
250
|
+
autoload :Muscle, 'bio/appl/muscle'
|
251
|
+
autoload :Probcons, 'bio/appl/probcons'
|
252
|
+
|
253
|
+
autoload :Sim4, 'bio/appl/sim4'
|
254
|
+
## below are described in bio/appl/sim4.rb
|
255
|
+
#class Sim4
|
256
|
+
# autoload :Report, 'bio/appl/sim4/report'
|
257
|
+
#end
|
258
|
+
|
259
|
+
autoload :Spidey, 'bio/appl/spidey/report'
|
260
|
+
autoload :Blat, 'bio/appl/blat/report'
|
261
|
+
|
262
|
+
module GCG
|
263
|
+
autoload :Msf, 'bio/appl/gcg/msf'
|
264
|
+
autoload :Seq, 'bio/appl/gcg/seq'
|
265
|
+
end
|
266
|
+
|
267
|
+
module Phylip
|
268
|
+
autoload :PhylipFormat, 'bio/appl/phylip/alignment'
|
269
|
+
autoload :DistanceMatrix, 'bio/appl/phylip/distance_matrix'
|
270
|
+
end
|
271
|
+
|
272
|
+
autoload :Iprscan, 'bio/appl/iprscan/report'
|
273
|
+
|
274
|
+
autoload :PAML, 'bio/appl/paml/common'
|
275
|
+
## below are described in bio/appl/paml/common.rb
|
276
|
+
# module PAML
|
277
|
+
# autoload :Codeml, 'bio/appl/paml/codeml'
|
278
|
+
# autoload :Baseml, 'bio/appl/paml/baseml'
|
279
|
+
# autoload :Yn00, 'bio/appl/paml/yn00'
|
280
|
+
# end
|
281
|
+
|
282
|
+
### Utilities
|
283
|
+
|
284
|
+
autoload :SiRNA, 'bio/util/sirna'
|
285
|
+
autoload :ColorScheme, 'bio/util/color_scheme'
|
286
|
+
autoload :ContingencyTable, 'bio/util/contingency_table'
|
287
|
+
autoload :RestrictionEnzyme, 'bio/util/restriction_enzyme'
|
288
|
+
|
289
|
+
### Service libraries
|
290
|
+
autoload :Command, 'bio/command'
|
291
|
+
|
292
|
+
### Provide BioRuby shell 'command' also as 'Bio.command' (like ChemRuby)
|
293
|
+
|
294
|
+
def self.method_missing(*args)
|
295
|
+
require 'bio/shell'
|
296
|
+
extend Bio::Shell
|
297
|
+
public_class_method(*Bio::Shell.private_instance_methods)
|
298
|
+
if Bio.respond_to?(args.first)
|
299
|
+
Bio.send(*args)
|
300
|
+
else
|
301
|
+
raise NameError
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
end
|
306
|
+
|
@@ -0,0 +1,2518 @@
|
|
1
|
+
#
|
2
|
+
# = bio/alignment.rb - multiple alignment of sequences
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2003, 2005, 2006
|
5
|
+
# GOTO Naohisa <ng@bioruby.org>
|
6
|
+
#
|
7
|
+
# License:: The Ruby License
|
8
|
+
#
|
9
|
+
# $Id: alignment.rb,v 1.24 2007/12/26 14:08:02 ngoto Exp $
|
10
|
+
#
|
11
|
+
# = About Bio::Alignment
|
12
|
+
#
|
13
|
+
# Please refer document of Bio::Alignment module.
|
14
|
+
#
|
15
|
+
# = References
|
16
|
+
#
|
17
|
+
# * Bio::Align::AlignI class of the BioPerl.
|
18
|
+
# http://doc.bioperl.org/releases/bioperl-1.4/Bio/Align/AlignI.html
|
19
|
+
#
|
20
|
+
# * Bio::SimpleAlign class of the BioPerl.
|
21
|
+
# http://doc.bioperl.org/releases/bioperl-1.4/Bio/SimpleAlign.html
|
22
|
+
#
|
23
|
+
|
24
|
+
require 'tempfile'
|
25
|
+
require 'bio/command'
|
26
|
+
require 'bio/sequence'
|
27
|
+
|
28
|
+
#---
|
29
|
+
# (depends on autoload)
|
30
|
+
#require 'bio/appl/gcg/seq'
|
31
|
+
#+++
|
32
|
+
|
33
|
+
module Bio
|
34
|
+
|
35
|
+
#
|
36
|
+
# = About Bio::Alignment
|
37
|
+
#
|
38
|
+
# Bio::Alignment is a namespace of classes/modules for multiple sequence
|
39
|
+
# alignment.
|
40
|
+
#
|
41
|
+
# = Multiple alignment container classes
|
42
|
+
#
|
43
|
+
# == Bio::Alignment::OriginalAlignment
|
44
|
+
#
|
45
|
+
# == Bio::Alignment::SequenceArray
|
46
|
+
#
|
47
|
+
# == Bio::Alignment::SequenceHash
|
48
|
+
#
|
49
|
+
# = Bio::Alignment::Site
|
50
|
+
#
|
51
|
+
# = Modules
|
52
|
+
#
|
53
|
+
# == Bio::Alignment::EnumerableExtension
|
54
|
+
#
|
55
|
+
# Mix-in for classes included Enumerable.
|
56
|
+
#
|
57
|
+
# == Bio::Alignment::ArrayExtension
|
58
|
+
#
|
59
|
+
# Mix-in for Array or Array-like classes.
|
60
|
+
#
|
61
|
+
# == Bio::Alignment::HashExtension
|
62
|
+
#
|
63
|
+
# Mix-in for Hash or Hash-like classes.
|
64
|
+
#
|
65
|
+
# == Bio::Alignment::SiteMethods
|
66
|
+
#
|
67
|
+
# == Bio::Alignment::PropertyMethods
|
68
|
+
#
|
69
|
+
# = Bio::Alignment::GAP
|
70
|
+
#
|
71
|
+
# = Compatibility from older BioRuby
|
72
|
+
#
|
73
|
+
module Alignment
|
74
|
+
|
75
|
+
autoload :MultiFastaFormat, 'bio/appl/mafft/report'
|
76
|
+
|
77
|
+
# Bio::Alignment::PropertyMethods is a set of methods to treat
|
78
|
+
# the gap character and so on.
|
79
|
+
module PropertyMethods
|
80
|
+
# regular expression for detecting gaps.
|
81
|
+
GAP_REGEXP = /[^a-zA-Z]/
|
82
|
+
# gap character
|
83
|
+
GAP_CHAR = '-'.freeze
|
84
|
+
# missing character
|
85
|
+
MISSING_CHAR = '?'.freeze
|
86
|
+
|
87
|
+
# If given character is a gap, returns true.
|
88
|
+
# Otherwise, return false.
|
89
|
+
# Note that <em>s</em> must be a String which contain a single character.
|
90
|
+
def is_gap?(s)
|
91
|
+
(gap_regexp =~ s) ? true : false
|
92
|
+
end
|
93
|
+
|
94
|
+
# Returns regular expression for checking gap.
|
95
|
+
def gap_regexp
|
96
|
+
((defined? @gap_regexp) ? @gap_regexp : nil) or GAP_REGEXP
|
97
|
+
end
|
98
|
+
# regular expression for checking gap
|
99
|
+
attr_writer :gap_regexp
|
100
|
+
|
101
|
+
# Gap character.
|
102
|
+
def gap_char
|
103
|
+
((defined? @gap_char) ? @gap_char : nil) or GAP_CHAR
|
104
|
+
end
|
105
|
+
# gap character
|
106
|
+
attr_writer :gap_char
|
107
|
+
|
108
|
+
# Character if the site is missing or unknown.
|
109
|
+
def missing_char
|
110
|
+
((defined? @missing_char) ? @missing_char : nil) or MISSING_CHAR
|
111
|
+
end
|
112
|
+
# Character if the site is missing or unknown.
|
113
|
+
attr_writer :missing_char
|
114
|
+
|
115
|
+
# Returns class of the sequence.
|
116
|
+
# If instance variable @seqclass (which can be
|
117
|
+
# set by 'seqclass=' method) is set, simply returns the value.
|
118
|
+
# Otherwise, returns the first sequence's class.
|
119
|
+
# If no sequences are found, returns nil.
|
120
|
+
def seqclass
|
121
|
+
((defined? @seqclass) ? @seqclass : nil) or String
|
122
|
+
end
|
123
|
+
|
124
|
+
# The class of the sequence.
|
125
|
+
# The value must be String or its derivatives.
|
126
|
+
attr_writer :seqclass
|
127
|
+
|
128
|
+
# Returns properties defined in the object as an hash.
|
129
|
+
def get_all_property
|
130
|
+
ret = {}
|
131
|
+
if defined? @gap_regexp
|
132
|
+
ret[:gap_regexp] = @gap_regexp
|
133
|
+
end
|
134
|
+
if defined? @gap_char
|
135
|
+
ret[:gap_char] = @gap_char
|
136
|
+
end
|
137
|
+
if defined? @missing_char
|
138
|
+
ret[:missing_char] = @missing_char
|
139
|
+
end
|
140
|
+
if defined? @seqclass
|
141
|
+
ret[:seqclass] = @seqclass
|
142
|
+
end
|
143
|
+
ret
|
144
|
+
end
|
145
|
+
|
146
|
+
# Sets properties from given hash.
|
147
|
+
# <em>hash</em> would be a return value of <tt>get_character</tt> method.
|
148
|
+
def set_all_property(hash)
|
149
|
+
@gap_regexp = hash[:gap_regexp] if hash.has_key?(:gap_regexp)
|
150
|
+
@gap_char = hash[:gap_char] if hash.has_key?(:gap_char)
|
151
|
+
@missing_char = hash[:missing_char] if hash.has_key?(:missing_char)
|
152
|
+
@seqclass = hash[:seqclass] if hash.has_key?(:seqclass)
|
153
|
+
self
|
154
|
+
end
|
155
|
+
end #module PropertyMethods
|
156
|
+
|
157
|
+
# Bio::Alignment::SiteMethods is a set of methods for
|
158
|
+
# Bio::Alignment::Site.
|
159
|
+
# It can also be used for extending an array of single-letter strings.
|
160
|
+
module SiteMethods
|
161
|
+
include PropertyMethods
|
162
|
+
|
163
|
+
# If there are gaps, returns true. Otherwise, returns false.
|
164
|
+
def has_gap?
|
165
|
+
(find { |x| is_gap?(x) }) ? true : false
|
166
|
+
end
|
167
|
+
|
168
|
+
# Removes gaps in the site. (destructive method)
|
169
|
+
def remove_gaps!
|
170
|
+
flag = nil
|
171
|
+
self.collect! do |x|
|
172
|
+
if is_gap?(x) then flag = self; nil; else x; end
|
173
|
+
end
|
174
|
+
self.compact!
|
175
|
+
flag
|
176
|
+
end
|
177
|
+
|
178
|
+
# Returns consensus character of the site.
|
179
|
+
# If consensus is found, eturns a single-letter string.
|
180
|
+
# If not, returns nil.
|
181
|
+
def consensus_string(threshold = 1.0)
|
182
|
+
return nil if self.size <= 0
|
183
|
+
return self[0] if self.sort.uniq.size == 1
|
184
|
+
h = Hash.new(0)
|
185
|
+
self.each { |x| h[x] += 1 }
|
186
|
+
total = self.size
|
187
|
+
b = h.to_a.sort do |x,y|
|
188
|
+
z = (y[1] <=> x[1])
|
189
|
+
z = (self.index(x[0]) <=> self.index(y[0])) if z == 0
|
190
|
+
z
|
191
|
+
end
|
192
|
+
if total * threshold <= b[0][1] then
|
193
|
+
b[0][0]
|
194
|
+
else
|
195
|
+
nil
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
# IUPAC nucleotide groups. Internal use only.
|
200
|
+
IUPAC_NUC = [
|
201
|
+
%w( t u ),
|
202
|
+
%w( m a c ),
|
203
|
+
%w( r a g ),
|
204
|
+
%w( w a t u ),
|
205
|
+
%w( s c g ),
|
206
|
+
%w( y c t u ),
|
207
|
+
%w( k g t u ),
|
208
|
+
%w( v a c g m r s ),
|
209
|
+
%w( h a c t u m w y ),
|
210
|
+
%w( d a g t u r w k ),
|
211
|
+
%w( b c g t u s y k ),
|
212
|
+
%w( n a c g t u m r w s y k v h d b )
|
213
|
+
]
|
214
|
+
|
215
|
+
# Returns an IUPAC consensus base for the site.
|
216
|
+
# If consensus is found, eturns a single-letter string.
|
217
|
+
# If not, returns nil.
|
218
|
+
def consensus_iupac
|
219
|
+
a = self.collect { |x| x.downcase }.sort.uniq
|
220
|
+
if a.size == 1 then
|
221
|
+
case a[0]
|
222
|
+
when 'a', 'c', 'g', 't'
|
223
|
+
a[0]
|
224
|
+
when 'u'
|
225
|
+
't'
|
226
|
+
else
|
227
|
+
IUPAC_NUC.find { |x| a[0] == x[0] } ? a[0] : nil
|
228
|
+
end
|
229
|
+
elsif r = IUPAC_NUC.find { |x| (a - x).size <= 0 } then
|
230
|
+
r[0]
|
231
|
+
else
|
232
|
+
nil
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
# Table of strongly conserved amino-acid groups.
|
237
|
+
#
|
238
|
+
# The value of the tables are taken from BioPerl
|
239
|
+
# (Bio/SimpleAlign.pm in BioPerl 1.0),
|
240
|
+
# and the BioPerl's document says that
|
241
|
+
# it is taken from Clustalw documentation and
|
242
|
+
# These are all the positively scoring groups that occur in the
|
243
|
+
# Gonnet Pam250 matrix. The strong and weak groups are
|
244
|
+
# defined as strong score >0.5 and weak score =<0.5 respectively.
|
245
|
+
#
|
246
|
+
StrongConservationGroups = %w(STA NEQK NHQK NDEQ QHRK MILV MILF
|
247
|
+
HY FYW).collect { |x| x.split('').sort }
|
248
|
+
|
249
|
+
# Table of weakly conserved amino-acid groups.
|
250
|
+
#
|
251
|
+
# Please refer StrongConservationGroups document
|
252
|
+
# for the origin of the table.
|
253
|
+
WeakConservationGroups = %w(CSA ATV SAG STNK STPA SGND SNDEQK
|
254
|
+
NDEQHK NEQHRK FVLIM HFY).collect { |x| x.split('').sort }
|
255
|
+
|
256
|
+
# Returns the match-line character for the site.
|
257
|
+
# This is amino-acid version.
|
258
|
+
def match_line_amino(opt = {})
|
259
|
+
# opt[:match_line_char] ==> 100% equal default: '*'
|
260
|
+
# opt[:strong_match_char] ==> strong match default: ':'
|
261
|
+
# opt[:weak_match_char] ==> weak match default: '.'
|
262
|
+
# opt[:mismatch_char] ==> mismatch default: ' '
|
263
|
+
mlc = (opt[:match_line_char] or '*')
|
264
|
+
smc = (opt[:strong_match_char] or ':')
|
265
|
+
wmc = (opt[:weak_match_char] or '.')
|
266
|
+
mmc = (opt[:mismatch_char] or ' ')
|
267
|
+
a = self.collect { |c| c.upcase }.sort.uniq
|
268
|
+
a.extend(SiteMethods)
|
269
|
+
if a.has_gap? then
|
270
|
+
mmc
|
271
|
+
elsif a.size == 1 then
|
272
|
+
mlc
|
273
|
+
elsif StrongConservationGroups.find { |x| (a - x).empty? } then
|
274
|
+
smc
|
275
|
+
elsif WeakConservationGroups.find { |x| (a - x).empty? } then
|
276
|
+
wmc
|
277
|
+
else
|
278
|
+
mmc
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
# Returns the match-line character for the site.
|
283
|
+
# This is nucleic-acid version.
|
284
|
+
def match_line_nuc(opt = {})
|
285
|
+
# opt[:match_line_char] ==> 100% equal default: '*'
|
286
|
+
# opt[:mismatch_char] ==> mismatch default: ' '
|
287
|
+
mlc = (opt[:match_line_char] or '*')
|
288
|
+
mmc = (opt[:mismatch_char] or ' ')
|
289
|
+
a = self.collect { |c| c.upcase }.sort.uniq
|
290
|
+
a.extend(SiteMethods)
|
291
|
+
if a.has_gap? then
|
292
|
+
mmc
|
293
|
+
elsif a.size == 1 then
|
294
|
+
mlc
|
295
|
+
else
|
296
|
+
mmc
|
297
|
+
end
|
298
|
+
end
|
299
|
+
end #module SiteMethods
|
300
|
+
|
301
|
+
# Bio::Alignment::Site stores bases or amino-acids in a
|
302
|
+
# site of the alignment.
|
303
|
+
# It would store multiple String objects of length 1.
|
304
|
+
# Please refer to the document of Array and SiteMethods for methods.
|
305
|
+
class Site < Array
|
306
|
+
include SiteMethods
|
307
|
+
end #module Site
|
308
|
+
|
309
|
+
# The module Bio::Alignment::EnumerableExtension is a set of useful
|
310
|
+
# methods for multiple sequence alignment.
|
311
|
+
# It can be included by any classes or can be extended to any objects.
|
312
|
+
# The classes or objects must have methods defined in Enumerable,
|
313
|
+
# and must have the <tt>each</tt> method
|
314
|
+
# which iterates over each sequence (or string) and yields
|
315
|
+
# a sequence (or string) object.
|
316
|
+
#
|
317
|
+
# Optionally, if <tt>each_seq</tt> method is defined,
|
318
|
+
# which iterates over each sequence (or string) and yields
|
319
|
+
# each sequence (or string) object, it is used instead of <tt>each</tt>.
|
320
|
+
#
|
321
|
+
# Note that the <tt>each</tt> or <tt>each_seq</tt> method would be
|
322
|
+
# called multiple times.
|
323
|
+
# This means that the module is not suitable for IO objects.
|
324
|
+
# In addition, <tt>break</tt> would be used in the given block and
|
325
|
+
# destructive methods would be used to the sequences.
|
326
|
+
#
|
327
|
+
# For Array or Hash objects, you'd better using
|
328
|
+
# ArrayExtension or HashExtension modules, respectively.
|
329
|
+
# They would have built-in <tt>each_seq</tt> method and/or
|
330
|
+
# some methods would be redefined.
|
331
|
+
#
|
332
|
+
module EnumerableExtension
|
333
|
+
include PropertyMethods
|
334
|
+
|
335
|
+
# Iterates over each sequences.
|
336
|
+
# Yields a sequence.
|
337
|
+
# It acts the same as Enumerable#each.
|
338
|
+
#
|
339
|
+
# You would redefine the method suitable for the class/object.
|
340
|
+
def each_seq(&block) #:yields: seq
|
341
|
+
each(&block)
|
342
|
+
end
|
343
|
+
|
344
|
+
# Returns class of the sequence.
|
345
|
+
# If instance variable @seqclass (which can be
|
346
|
+
# set by 'seqclass=' method) is set, simply returns the value.
|
347
|
+
# Otherwise, returns the first sequence's class.
|
348
|
+
# If no sequences are found, returns nil.
|
349
|
+
def seqclass
|
350
|
+
if (defined? @seqclass) and @seqclass then
|
351
|
+
@seqclass
|
352
|
+
else
|
353
|
+
klass = nil
|
354
|
+
each_seq do |s|
|
355
|
+
if s then
|
356
|
+
klass = s.class
|
357
|
+
break if klass
|
358
|
+
end
|
359
|
+
end
|
360
|
+
(klass or String)
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
# Returns the alignment length.
|
365
|
+
# Returns the longest length of the sequence in the alignment.
|
366
|
+
def alignment_length
|
367
|
+
maxlen = 0
|
368
|
+
each_seq do |s|
|
369
|
+
x = s.length
|
370
|
+
maxlen = x if x > maxlen
|
371
|
+
end
|
372
|
+
maxlen
|
373
|
+
end
|
374
|
+
alias seq_length alignment_length
|
375
|
+
|
376
|
+
# Gets a site of the position.
|
377
|
+
# Returns a Bio::Alignment::Site object.
|
378
|
+
#
|
379
|
+
# If the position is out of range, it returns the site
|
380
|
+
# of which all are gaps.
|
381
|
+
#
|
382
|
+
# It is a private method.
|
383
|
+
# Only difference from public alignment_site method is
|
384
|
+
# it does not do <tt>set_all_property(get_all_property)</tt>.
|
385
|
+
def _alignment_site(position)
|
386
|
+
site = Site.new
|
387
|
+
each_seq do |s|
|
388
|
+
c = s[position, 1]
|
389
|
+
if c.to_s.empty?
|
390
|
+
c = seqclass.new(gap_char)
|
391
|
+
end
|
392
|
+
site << c
|
393
|
+
end
|
394
|
+
site
|
395
|
+
end
|
396
|
+
private :_alignment_site
|
397
|
+
|
398
|
+
# Gets a site of the position.
|
399
|
+
# Returns a Bio::Alignment::Site object.
|
400
|
+
#
|
401
|
+
# If the position is out of range, it returns the site
|
402
|
+
# of which all are gaps.
|
403
|
+
def alignment_site(position)
|
404
|
+
site = _alignment_site(position)
|
405
|
+
site.set_all_property(get_all_property)
|
406
|
+
site
|
407
|
+
end
|
408
|
+
|
409
|
+
# Iterates over each site of the alignment.
|
410
|
+
# It yields a Bio::Alignment::Site object (which inherits Array).
|
411
|
+
# It returns self.
|
412
|
+
def each_site
|
413
|
+
cp = get_all_property
|
414
|
+
(0...alignment_length).each do |i|
|
415
|
+
site = _alignment_site(i)
|
416
|
+
site.set_all_property(cp)
|
417
|
+
yield(site)
|
418
|
+
end
|
419
|
+
self
|
420
|
+
end
|
421
|
+
|
422
|
+
# Iterates over each site of the alignment, with specifying
|
423
|
+
# start, stop positions and step.
|
424
|
+
# It yields Bio::Alignment::Site object (which inherits Array).
|
425
|
+
# It returns self.
|
426
|
+
# It is same as
|
427
|
+
# <tt>start.step(stop, step) { |i| yield alignment_site(i) }</tt>.
|
428
|
+
def each_site_step(start, stop, step = 1)
|
429
|
+
cp = get_all_property
|
430
|
+
start.step(stop, step) do |i|
|
431
|
+
site = _alignment_site(i)
|
432
|
+
site.set_all_property(cp)
|
433
|
+
yield(site)
|
434
|
+
end
|
435
|
+
self
|
436
|
+
end
|
437
|
+
|
438
|
+
# Iterates over each sequence and results running blocks
|
439
|
+
# are collected and returns a new alignment as a
|
440
|
+
# Bio::Alignment::SequenceArray object.
|
441
|
+
#
|
442
|
+
# Note that it would be redefined if you want to change
|
443
|
+
# return value's class.
|
444
|
+
#
|
445
|
+
def alignment_collect
|
446
|
+
a = SequenceArray.new
|
447
|
+
a.set_all_property(get_all_property)
|
448
|
+
each_seq do |str|
|
449
|
+
a << yield(str)
|
450
|
+
end
|
451
|
+
a
|
452
|
+
end
|
453
|
+
|
454
|
+
# Returns specified range of the alignment.
|
455
|
+
# For each sequence, the '[]' method (it may be String#[])
|
456
|
+
# is executed, and returns a new alignment
|
457
|
+
# as a Bio::Alignment::SequenceArray object.
|
458
|
+
#
|
459
|
+
# Unlike alignment_slice method, the result alignment are
|
460
|
+
# guaranteed to contain String object if the range specified
|
461
|
+
# is out of range.
|
462
|
+
#
|
463
|
+
# If you want to change return value's class, you should redefine
|
464
|
+
# alignment_collect method.
|
465
|
+
#
|
466
|
+
def alignment_window(*arg)
|
467
|
+
alignment_collect do |s|
|
468
|
+
s[*arg] or seqclass.new('')
|
469
|
+
end
|
470
|
+
end
|
471
|
+
alias window alignment_window
|
472
|
+
|
473
|
+
# Iterates over each sliding window of the alignment.
|
474
|
+
# window_size is the size of sliding window.
|
475
|
+
# step is the step of each sliding.
|
476
|
+
# It yields a Bio::Alignment::SequenceArray object which contains
|
477
|
+
# each sliding window.
|
478
|
+
# It returns a Bio::Alignment::SequenceArray object which contains
|
479
|
+
# remainder alignment at the terminal end.
|
480
|
+
# If window_size is smaller than 0, it returns nil.
|
481
|
+
def each_window(window_size, step_size = 1)
|
482
|
+
return nil if window_size < 0
|
483
|
+
if step_size >= 0 then
|
484
|
+
last_step = nil
|
485
|
+
0.step(alignment_length - window_size, step_size) do |i|
|
486
|
+
yield alignment_window(i, window_size)
|
487
|
+
last_step = i
|
488
|
+
end
|
489
|
+
alignment_window((last_step + window_size)..-1)
|
490
|
+
else
|
491
|
+
i = alignment_length - window_size
|
492
|
+
while i >= 0
|
493
|
+
yield alignment_window(i, window_size)
|
494
|
+
i += step_size
|
495
|
+
end
|
496
|
+
alignment_window(0...(i-step_size))
|
497
|
+
end
|
498
|
+
end
|
499
|
+
|
500
|
+
# Iterates over each site of the alignment and results running the
|
501
|
+
# block are collected and returns an array.
|
502
|
+
# It yields a Bio::Alignment::Site object.
|
503
|
+
def collect_each_site
|
504
|
+
ary = []
|
505
|
+
each_site do |site|
|
506
|
+
ary << yield(site)
|
507
|
+
end
|
508
|
+
ary
|
509
|
+
end
|
510
|
+
|
511
|
+
# Helper method for calculating consensus sequence.
|
512
|
+
# It iterates over each site of the alignment.
|
513
|
+
# In each site, gaps will be removed if specified with opt.
|
514
|
+
# It yields a Bio::Alignment::Site object.
|
515
|
+
# Results running the block (String objects are expected)
|
516
|
+
# are joined to a string and it returns the string.
|
517
|
+
#
|
518
|
+
# opt[:gap_mode] ==> 0 -- gaps are regarded as normal characters
|
519
|
+
# 1 -- a site within gaps is regarded as a gap
|
520
|
+
# -1 -- gaps are eliminated from consensus calculation
|
521
|
+
# default: 0
|
522
|
+
#
|
523
|
+
def consensus_each_site(opt = {})
|
524
|
+
mchar = (opt[:missing_char] or self.missing_char)
|
525
|
+
gap_mode = opt[:gap_mode]
|
526
|
+
case gap_mode
|
527
|
+
when 0, nil
|
528
|
+
collect_each_site do |a|
|
529
|
+
yield(a) or mchar
|
530
|
+
end.join('')
|
531
|
+
when 1
|
532
|
+
collect_each_site do |a|
|
533
|
+
a.has_gap? ? gap_char : (yield(a) or mchar)
|
534
|
+
end.join('')
|
535
|
+
when -1
|
536
|
+
collect_each_site do |a|
|
537
|
+
a.remove_gaps!
|
538
|
+
a.empty? ? gap_char : (yield(a) or mchar)
|
539
|
+
end.join('')
|
540
|
+
else
|
541
|
+
raise ':gap_mode must be 0, 1 or -1'
|
542
|
+
end
|
543
|
+
end
|
544
|
+
|
545
|
+
# Returns the consensus string of the alignment.
|
546
|
+
# 0.0 <= threshold <= 1.0 is expected.
|
547
|
+
#
|
548
|
+
# It resembles the BioPerl's AlignI::consensus_string method.
|
549
|
+
#
|
550
|
+
# Please refer to the consensus_each_site method for opt.
|
551
|
+
#
|
552
|
+
def consensus_string(threshold = 1.0, opt = {})
|
553
|
+
consensus_each_site(opt) do |a|
|
554
|
+
a.consensus_string(threshold)
|
555
|
+
end
|
556
|
+
end
|
557
|
+
|
558
|
+
# Returns the IUPAC consensus string of the alignment
|
559
|
+
# of nucleic-acid sequences.
|
560
|
+
#
|
561
|
+
# It resembles the BioPerl's AlignI::consensus_iupac method.
|
562
|
+
#
|
563
|
+
# Please refer to the consensus_each_site method for opt.
|
564
|
+
#
|
565
|
+
def consensus_iupac(opt = {})
|
566
|
+
consensus_each_site(opt) do |a|
|
567
|
+
a.consensus_iupac
|
568
|
+
end
|
569
|
+
end
|
570
|
+
|
571
|
+
# Returns the match line stirng of the alignment
|
572
|
+
# of amino-acid sequences.
|
573
|
+
#
|
574
|
+
# It resembles the BioPerl's AlignI::match_line method.
|
575
|
+
#
|
576
|
+
# opt[:match_line_char] ==> 100% equal default: '*'
|
577
|
+
# opt[:strong_match_char] ==> strong match default: ':'
|
578
|
+
# opt[:weak_match_char] ==> weak match default: '.'
|
579
|
+
# opt[:mismatch_char] ==> mismatch default: ' '
|
580
|
+
#
|
581
|
+
# More opt can be accepted.
|
582
|
+
# Please refer to the consensus_each_site method for opt.
|
583
|
+
#
|
584
|
+
def match_line_amino(opt = {})
|
585
|
+
collect_each_site do |a|
|
586
|
+
a.match_line_amino(opt)
|
587
|
+
end.join('')
|
588
|
+
end
|
589
|
+
|
590
|
+
# Returns the match line stirng of the alignment
|
591
|
+
# of nucleic-acid sequences.
|
592
|
+
#
|
593
|
+
# It resembles the BioPerl's AlignI::match_line method.
|
594
|
+
#
|
595
|
+
# opt[:match_line_char] ==> 100% equal default: '*'
|
596
|
+
# opt[:mismatch_char] ==> mismatch default: ' '
|
597
|
+
#
|
598
|
+
# More opt can be accepted.
|
599
|
+
# Please refer to the consensus_each_site method for opt.
|
600
|
+
#
|
601
|
+
def match_line_nuc(opt = {})
|
602
|
+
collect_each_site do |a|
|
603
|
+
a.match_line_nuc(opt)
|
604
|
+
end.join('')
|
605
|
+
end
|
606
|
+
|
607
|
+
# Returns the match line stirng of the alignment
|
608
|
+
# of nucleic- or amino-acid sequences.
|
609
|
+
# The type of the sequence is automatically determined
|
610
|
+
# or you can specify with opt[:type].
|
611
|
+
#
|
612
|
+
# It resembles the BioPerl's AlignI::match_line method.
|
613
|
+
#
|
614
|
+
# opt[:type] ==> :na or :aa (or determined by sequence class)
|
615
|
+
# opt[:match_line_char] ==> 100% equal default: '*'
|
616
|
+
# opt[:strong_match_char] ==> strong match default: ':'
|
617
|
+
# opt[:weak_match_char] ==> weak match default: '.'
|
618
|
+
# opt[:mismatch_char] ==> mismatch default: ' '
|
619
|
+
# :strong_ and :weak_match_char are used only in amino mode (:aa)
|
620
|
+
#
|
621
|
+
# More opt can be accepted.
|
622
|
+
# Please refer to the consensus_each_site method for opt.
|
623
|
+
#
|
624
|
+
def match_line(opt = {})
|
625
|
+
case opt[:type]
|
626
|
+
when :aa
|
627
|
+
amino = true
|
628
|
+
when :na, :dna, :rna
|
629
|
+
amino = false
|
630
|
+
else
|
631
|
+
if seqclass == Bio::Sequence::AA then
|
632
|
+
amino = true
|
633
|
+
elsif seqclass == Bio::Sequence::NA then
|
634
|
+
amino = false
|
635
|
+
else
|
636
|
+
amino = nil
|
637
|
+
self.each_seq do |x|
|
638
|
+
if /[EFILPQ]/i =~ x
|
639
|
+
amino = true
|
640
|
+
break
|
641
|
+
end
|
642
|
+
end
|
643
|
+
end
|
644
|
+
end
|
645
|
+
if amino then
|
646
|
+
match_line_amino(opt)
|
647
|
+
else
|
648
|
+
match_line_nuc(opt)
|
649
|
+
end
|
650
|
+
end
|
651
|
+
|
652
|
+
# This is the BioPerl's AlignI::match like method.
|
653
|
+
#
|
654
|
+
# Changes second to last sequences' sites to match_char(default: '.')
|
655
|
+
# when a site is equeal to the first sequence's corresponding site.
|
656
|
+
#
|
657
|
+
# Note that it is a destructive method.
|
658
|
+
#
|
659
|
+
# For Hash, please use it carefully because
|
660
|
+
# the order of the sequences is inconstant.
|
661
|
+
#
|
662
|
+
def convert_match(match_char = '.')
|
663
|
+
#(BioPerl) AlignI::match like method
|
664
|
+
len = alignment_length
|
665
|
+
firstseq = nil
|
666
|
+
each_seq do |s|
|
667
|
+
unless firstseq then
|
668
|
+
firstseq = s
|
669
|
+
else
|
670
|
+
(0...len).each do |i|
|
671
|
+
if s[i] and firstseq[i] == s[i] and !is_gap?(firstseq[i..i])
|
672
|
+
s[i..i] = match_char
|
673
|
+
end
|
674
|
+
end
|
675
|
+
end
|
676
|
+
end
|
677
|
+
self
|
678
|
+
end
|
679
|
+
|
680
|
+
# This is the BioPerl's AlignI::unmatch like method.
|
681
|
+
#
|
682
|
+
# Changes second to last sequences' sites match_char(default: '.')
|
683
|
+
# to original sites' characters.
|
684
|
+
#
|
685
|
+
# Note that it is a destructive method.
|
686
|
+
#
|
687
|
+
# For Hash, please use it carefully because
|
688
|
+
# the order of the sequences is inconstant.
|
689
|
+
#
|
690
|
+
def convert_unmatch(match_char = '.')
|
691
|
+
#(BioPerl) AlignI::unmatch like method
|
692
|
+
len = alignment_length
|
693
|
+
firstseq = nil
|
694
|
+
each_seq do |s|
|
695
|
+
unless firstseq then
|
696
|
+
firstseq = s
|
697
|
+
else
|
698
|
+
(0...len).each do |i|
|
699
|
+
if s[i..i] == match_char then
|
700
|
+
s[i..i] = (firstseq[i..i] or match_char)
|
701
|
+
end
|
702
|
+
end
|
703
|
+
end
|
704
|
+
end
|
705
|
+
self
|
706
|
+
end
|
707
|
+
|
708
|
+
# Fills gaps to the tail of each sequence if the length of
|
709
|
+
# the sequence is shorter than the alignment length.
|
710
|
+
#
|
711
|
+
# Note that it is a destructive method.
|
712
|
+
def alignment_normalize!
|
713
|
+
#(original)
|
714
|
+
len = alignment_length
|
715
|
+
each_seq do |s|
|
716
|
+
s << (gap_char * (len - s.length)) if s.length < len
|
717
|
+
end
|
718
|
+
self
|
719
|
+
end
|
720
|
+
alias normalize! alignment_normalize!
|
721
|
+
|
722
|
+
# Removes excess gaps in the tail of the sequences.
|
723
|
+
# If removes nothing, returns nil.
|
724
|
+
# Otherwise, returns self.
|
725
|
+
#
|
726
|
+
# Note that it is a destructive method.
|
727
|
+
def alignment_rstrip!
|
728
|
+
#(String-like)
|
729
|
+
len = alignment_length
|
730
|
+
newlen = len
|
731
|
+
each_site_step(len - 1, 0, -1) do |a|
|
732
|
+
a.remove_gaps!
|
733
|
+
if a.empty? then
|
734
|
+
newlen -= 1
|
735
|
+
else
|
736
|
+
break
|
737
|
+
end
|
738
|
+
end
|
739
|
+
return nil if newlen >= len
|
740
|
+
each_seq do |s|
|
741
|
+
s[newlen..-1] = '' if s.length > newlen
|
742
|
+
end
|
743
|
+
self
|
744
|
+
end
|
745
|
+
alias rstrip! alignment_rstrip!
|
746
|
+
|
747
|
+
# Removes excess gaps in the head of the sequences.
|
748
|
+
# If removes nothing, returns nil.
|
749
|
+
# Otherwise, returns self.
|
750
|
+
#
|
751
|
+
# Note that it is a destructive method.
|
752
|
+
def alignment_lstrip!
|
753
|
+
#(String-like)
|
754
|
+
pos = 0
|
755
|
+
each_site do |a|
|
756
|
+
a.remove_gaps!
|
757
|
+
if a.empty?
|
758
|
+
pos += 1
|
759
|
+
else
|
760
|
+
break
|
761
|
+
end
|
762
|
+
end
|
763
|
+
return nil if pos <= 0
|
764
|
+
each_seq { |s| s[0, pos] = '' }
|
765
|
+
self
|
766
|
+
end
|
767
|
+
alias lstrip! alignment_lstrip!
|
768
|
+
|
769
|
+
# Removes excess gaps in the sequences.
|
770
|
+
# If removes nothing, returns nil.
|
771
|
+
# Otherwise, returns self.
|
772
|
+
#
|
773
|
+
# Note that it is a destructive method.
|
774
|
+
def alignment_strip!
|
775
|
+
#(String-like)
|
776
|
+
r = alignment_rstrip!
|
777
|
+
l = alignment_lstrip!
|
778
|
+
(r or l)
|
779
|
+
end
|
780
|
+
alias strip! alignment_strip!
|
781
|
+
|
782
|
+
# Completely removes ALL gaps in the sequences.
|
783
|
+
# If removes nothing, returns nil.
|
784
|
+
# Otherwise, returns self.
|
785
|
+
#
|
786
|
+
# Note that it is a destructive method.
|
787
|
+
def remove_all_gaps!
|
788
|
+
ret = nil
|
789
|
+
each_seq do |s|
|
790
|
+
x = s.gsub!(gap_regexp, '')
|
791
|
+
ret ||= x
|
792
|
+
end
|
793
|
+
ret ? self : nil
|
794
|
+
end
|
795
|
+
|
796
|
+
# Returns the specified range of the alignment.
|
797
|
+
# For each sequence, the 'slice' method (it may be String#slice,
|
798
|
+
# which is the same as String#[]) is executed, and
|
799
|
+
# returns a new alignment as a Bio::Alignment::SequenceArray object.
|
800
|
+
#
|
801
|
+
# Unlike alignment_window method, the result alignment
|
802
|
+
# might contain nil.
|
803
|
+
#
|
804
|
+
# If you want to change return value's class, you should redefine
|
805
|
+
# alignment_collect method.
|
806
|
+
#
|
807
|
+
def alignment_slice(*arg)
|
808
|
+
#(String-like)
|
809
|
+
#(BioPerl) AlignI::slice like method
|
810
|
+
alignment_collect do |s|
|
811
|
+
s.slice(*arg)
|
812
|
+
end
|
813
|
+
end
|
814
|
+
alias slice alignment_slice
|
815
|
+
|
816
|
+
# For each sequence, the 'subseq' method (Bio::Seqeunce::Common#subseq is
|
817
|
+
# expected) is executed, and returns a new alignment as
|
818
|
+
# a Bio::Alignment::SequenceArray object.
|
819
|
+
#
|
820
|
+
# All sequences in the alignment are expected to be kind of
|
821
|
+
# Bio::Sequence::NA or Bio::Sequence::AA objects.
|
822
|
+
#
|
823
|
+
# Unlike alignment_window method, the result alignment
|
824
|
+
# might contain nil.
|
825
|
+
#
|
826
|
+
# If you want to change return value's class, you should redefine
|
827
|
+
# alignment_collect method.
|
828
|
+
#
|
829
|
+
def alignment_subseq(*arg)
|
830
|
+
#(original)
|
831
|
+
alignment_collect do |s|
|
832
|
+
s.subseq(*arg)
|
833
|
+
end
|
834
|
+
end
|
835
|
+
alias subseq alignment_subseq
|
836
|
+
|
837
|
+
# Concatenates the given alignment.
|
838
|
+
# <em>align</em> must have <tt>each_seq</tt>
|
839
|
+
# or <tt>each</tt> method.
|
840
|
+
#
|
841
|
+
# Returns self.
|
842
|
+
#
|
843
|
+
# Note that it is a destructive method.
|
844
|
+
#
|
845
|
+
# For Hash, please use it carefully because
|
846
|
+
# the order of the sequences is inconstant and
|
847
|
+
# key information is completely ignored.
|
848
|
+
#
|
849
|
+
def alignment_concat(align)
|
850
|
+
flag = nil
|
851
|
+
a = []
|
852
|
+
each_seq { |s| a << s }
|
853
|
+
i = 0
|
854
|
+
begin
|
855
|
+
align.each_seq do |seq|
|
856
|
+
flag = true
|
857
|
+
a[i].concat(seq) if a[i] and seq
|
858
|
+
i += 1
|
859
|
+
end
|
860
|
+
return self
|
861
|
+
rescue NoMethodError, ArgumentError => evar
|
862
|
+
raise evar if flag
|
863
|
+
end
|
864
|
+
align.each do |seq|
|
865
|
+
a[i].concat(seq) if a[i] and seq
|
866
|
+
i += 1
|
867
|
+
end
|
868
|
+
self
|
869
|
+
end
|
870
|
+
end #module EnumerableExtension
|
871
|
+
|
872
|
+
module Output
|
873
|
+
def output(format, *arg)
|
874
|
+
case format
|
875
|
+
when :clustal
|
876
|
+
output_clustal(*arg)
|
877
|
+
when :fasta
|
878
|
+
output_fasta(*arg)
|
879
|
+
when :phylip
|
880
|
+
output_phylip(*arg)
|
881
|
+
when :phylipnon
|
882
|
+
output_phylipnon(*arg)
|
883
|
+
when :msf
|
884
|
+
output_msf(*arg)
|
885
|
+
when :molphy
|
886
|
+
output_molphy(*arg)
|
887
|
+
else
|
888
|
+
raise "Unknown format: #{format.inspect}"
|
889
|
+
end
|
890
|
+
end
|
891
|
+
|
892
|
+
# Check whether there are same names for ClustalW format.
|
893
|
+
#
|
894
|
+
# array:: names of the sequences (array of string)
|
895
|
+
# len:: length to check (default:30)
|
896
|
+
def __clustal_have_same_name?(array, len = 30)
|
897
|
+
na30 = array.collect do |k|
|
898
|
+
k.to_s.split(/[\x00\s]/)[0].to_s[0, len].gsub(/\:\;\,\(\)/, '_').to_s
|
899
|
+
end
|
900
|
+
#p na30
|
901
|
+
na30idx = (0...(na30.size)).to_a
|
902
|
+
na30idx.sort! do |x,y|
|
903
|
+
na30[x] <=> na30[y]
|
904
|
+
end
|
905
|
+
#p na30idx
|
906
|
+
y = nil
|
907
|
+
dupidx = []
|
908
|
+
na30idx.each do |x|
|
909
|
+
if y and na30[y] == na30[x] then
|
910
|
+
dupidx << y
|
911
|
+
dupidx << x
|
912
|
+
end
|
913
|
+
y = x
|
914
|
+
end
|
915
|
+
if dupidx.size > 0 then
|
916
|
+
dupidx.sort!
|
917
|
+
dupidx.uniq!
|
918
|
+
dupidx
|
919
|
+
else
|
920
|
+
false
|
921
|
+
end
|
922
|
+
end
|
923
|
+
private :__clustal_have_same_name?
|
924
|
+
|
925
|
+
# Changes sequence names if there are conflicted names
|
926
|
+
# for ClustalW format.
|
927
|
+
#
|
928
|
+
# array:: names of the sequences (array of string)
|
929
|
+
# len:: length to check (default:30)
|
930
|
+
def __clustal_avoid_same_name(array, len = 30)
|
931
|
+
na = array.collect { |k| k.to_s.gsub(/[\r\n\x00]/, ' ') }
|
932
|
+
if dupidx = __clustal_have_same_name?(na, len)
|
933
|
+
procs = [
|
934
|
+
Proc.new { |s, i|
|
935
|
+
s[0, len].to_s.gsub(/\s/, '_') + s[len..-1].to_s
|
936
|
+
},
|
937
|
+
# Proc.new { |s, i|
|
938
|
+
# "#{i}_#{s}"
|
939
|
+
# },
|
940
|
+
]
|
941
|
+
procs.each do |pr|
|
942
|
+
dupidx.each do |i|
|
943
|
+
s = array[i]
|
944
|
+
na[i] = pr.call(s.to_s, i)
|
945
|
+
end
|
946
|
+
dupidx = __clustal_have_same_name?(na, len)
|
947
|
+
break unless dupidx
|
948
|
+
end
|
949
|
+
if dupidx then
|
950
|
+
na.each_with_index do |s, i|
|
951
|
+
na[i] = "#{i}_#{s}"
|
952
|
+
end
|
953
|
+
end
|
954
|
+
end
|
955
|
+
na
|
956
|
+
end
|
957
|
+
private :__clustal_avoid_same_name
|
958
|
+
|
959
|
+
# Generates ClustalW-formatted text
|
960
|
+
# seqs:: sequences (must be an alignment object)
|
961
|
+
# names:: names of the sequences
|
962
|
+
# options:: options
|
963
|
+
def __clustal_formatter(seqs, names, options = {})
|
964
|
+
#(original)
|
965
|
+
aln = [ "CLUSTAL (0.00) multiple sequence alignment\n\n" ]
|
966
|
+
len = seqs.seq_length
|
967
|
+
sn = names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') }
|
968
|
+
if options[:replace_space]
|
969
|
+
sn.collect! { |x| x.gsub(/\s/, '_') }
|
970
|
+
end
|
971
|
+
if !options.has_key?(:escape) or options[:escape]
|
972
|
+
sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
|
973
|
+
end
|
974
|
+
if !options.has_key?(:split) or options[:split]
|
975
|
+
sn.collect! { |x| x.split(/\s/)[0].to_s }
|
976
|
+
end
|
977
|
+
if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
|
978
|
+
sn = __clustal_avoid_same_name(sn)
|
979
|
+
end
|
980
|
+
|
981
|
+
if sn.find { |x| x.length > 10 } then
|
982
|
+
seqwidth = 50
|
983
|
+
namewidth = 30
|
984
|
+
sep = ' ' * 6
|
985
|
+
else
|
986
|
+
seqwidth = 60
|
987
|
+
namewidth = 10
|
988
|
+
sep = ' ' * 6
|
989
|
+
end
|
990
|
+
seqregexp = Regexp.new("(.{1,#{seqwidth}})")
|
991
|
+
gchar = (options[:gap_char] or '-')
|
992
|
+
|
993
|
+
case options[:type].to_s
|
994
|
+
when /protein/i, /aa/i
|
995
|
+
mopt = { :type => :aa }
|
996
|
+
when /na/i
|
997
|
+
mopt = { :type => :na }
|
998
|
+
else
|
999
|
+
mopt = {}
|
1000
|
+
end
|
1001
|
+
mline = (options[:match_line] or seqs.match_line(mopt))
|
1002
|
+
|
1003
|
+
aseqs = Array.new(seqs.number_of_sequences).clear
|
1004
|
+
seqs.each_seq do |s|
|
1005
|
+
aseqs << s.to_s.gsub(seqs.gap_regexp, gchar)
|
1006
|
+
end
|
1007
|
+
case options[:case].to_s
|
1008
|
+
when /lower/i
|
1009
|
+
aseqs.each { |s| s.downcase! }
|
1010
|
+
when /upper/i
|
1011
|
+
aseqs.each { |s| s.upcase! }
|
1012
|
+
end
|
1013
|
+
|
1014
|
+
aseqs << mline
|
1015
|
+
aseqs.collect! do |s|
|
1016
|
+
snx = sn.shift
|
1017
|
+
head = sprintf("%*s", -namewidth, snx.to_s)[0, namewidth] + sep
|
1018
|
+
s << (gchar * (len - s.length))
|
1019
|
+
s.gsub!(seqregexp, "\\1\n")
|
1020
|
+
a = s.split(/^/)
|
1021
|
+
if options[:seqnos] and snx then
|
1022
|
+
i = 0
|
1023
|
+
a.each do |x|
|
1024
|
+
x.chomp!
|
1025
|
+
l = x.tr(gchar, '').length
|
1026
|
+
i += l
|
1027
|
+
x.concat(l > 0 ? " #{i}\n" : "\n")
|
1028
|
+
end
|
1029
|
+
end
|
1030
|
+
a.collect { |x| head + x }
|
1031
|
+
end
|
1032
|
+
lines = (len + seqwidth - 1).div(seqwidth)
|
1033
|
+
lines.times do
|
1034
|
+
aln << "\n"
|
1035
|
+
aseqs.each { |a| aln << a.shift }
|
1036
|
+
end
|
1037
|
+
aln.join('')
|
1038
|
+
end
|
1039
|
+
private :__clustal_formatter
|
1040
|
+
|
1041
|
+
# Generates ClustalW-formatted text
|
1042
|
+
# seqs:: sequences (must be an alignment object)
|
1043
|
+
# names:: names of the sequences
|
1044
|
+
# options:: options
|
1045
|
+
def output_clustal(options = {})
|
1046
|
+
__clustal_formatter(self, self.sequence_names, options)
|
1047
|
+
end
|
1048
|
+
|
1049
|
+
# to_clustal is deprecated. Instead, please use output_clustal.
|
1050
|
+
#---
|
1051
|
+
#alias to_clustal output_clustal
|
1052
|
+
#+++
|
1053
|
+
def to_clustal(*arg)
|
1054
|
+
warn "to_clustal is deprecated. Please use output_clustal."
|
1055
|
+
output_clustal(*arg)
|
1056
|
+
end
|
1057
|
+
|
1058
|
+
# Generates fasta format text and returns a string.
|
1059
|
+
def output_fasta(options={})
|
1060
|
+
#(original)
|
1061
|
+
width = (options[:width] or 70)
|
1062
|
+
if options[:avoid_same_name] then
|
1063
|
+
na = __clustal_avoid_same_name(self.sequence_names, 30)
|
1064
|
+
else
|
1065
|
+
na = self.sequence_names.collect do |k|
|
1066
|
+
k.to_s.gsub(/[\r\n\x00]/, ' ')
|
1067
|
+
end
|
1068
|
+
end
|
1069
|
+
if width and width > 0 then
|
1070
|
+
w_reg = Regexp.new(".{1,#{width}}")
|
1071
|
+
self.collect do |s|
|
1072
|
+
">#{na.shift}\n" + s.to_s.gsub(w_reg, "\\0\n")
|
1073
|
+
end.join('')
|
1074
|
+
else
|
1075
|
+
self.collect do |s|
|
1076
|
+
">#{na.shift}\n" + s.to_s + "\n"
|
1077
|
+
end.join('')
|
1078
|
+
end
|
1079
|
+
end
|
1080
|
+
|
1081
|
+
# generates phylip interleaved alignment format as a string
|
1082
|
+
def output_phylip(options = {})
|
1083
|
+
aln, aseqs, lines = __output_phylip_common(options)
|
1084
|
+
lines.times do
|
1085
|
+
aseqs.each { |a| aln << a.shift }
|
1086
|
+
aln << "\n"
|
1087
|
+
end
|
1088
|
+
aln.pop if aln[-1] == "\n"
|
1089
|
+
aln.join('')
|
1090
|
+
end
|
1091
|
+
|
1092
|
+
# generates Phylip3.2 (old) non-interleaved format as a string
|
1093
|
+
def output_phylipnon(options = {})
|
1094
|
+
aln, aseqs, lines = __output_phylip_common(options)
|
1095
|
+
aln.first + aseqs.join('')
|
1096
|
+
end
|
1097
|
+
|
1098
|
+
# common routine for interleaved/non-interleaved phylip format
|
1099
|
+
def __output_phylip_common(options = {})
|
1100
|
+
len = self.alignment_length
|
1101
|
+
aln = [ " #{self.number_of_sequences} #{len}\n" ]
|
1102
|
+
sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') }
|
1103
|
+
if options[:replace_space]
|
1104
|
+
sn.collect! { |x| x.gsub(/\s/, '_') }
|
1105
|
+
end
|
1106
|
+
if !options.has_key?(:escape) or options[:escape]
|
1107
|
+
sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
|
1108
|
+
end
|
1109
|
+
if !options.has_key?(:split) or options[:split]
|
1110
|
+
sn.collect! { |x| x.split(/\s/)[0].to_s }
|
1111
|
+
end
|
1112
|
+
if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
|
1113
|
+
sn = __clustal_avoid_same_name(sn, 10)
|
1114
|
+
end
|
1115
|
+
|
1116
|
+
namewidth = 10
|
1117
|
+
seqwidth = (options[:width] or 60)
|
1118
|
+
seqwidth = seqwidth.div(10) * 10
|
1119
|
+
seqregexp = Regexp.new("(.{1,#{seqwidth.div(10) * 11}})")
|
1120
|
+
gchar = (options[:gap_char] or '-')
|
1121
|
+
|
1122
|
+
aseqs = Array.new(self.number_of_sequences).clear
|
1123
|
+
self.each_seq do |s|
|
1124
|
+
aseqs << s.to_s.gsub(self.gap_regexp, gchar)
|
1125
|
+
end
|
1126
|
+
case options[:case].to_s
|
1127
|
+
when /lower/i
|
1128
|
+
aseqs.each { |s| s.downcase! }
|
1129
|
+
when /upper/i
|
1130
|
+
aseqs.each { |s| s.upcase! }
|
1131
|
+
end
|
1132
|
+
|
1133
|
+
aseqs.collect! do |s|
|
1134
|
+
snx = sn.shift
|
1135
|
+
head = sprintf("%*s", -namewidth, snx.to_s)[0, namewidth]
|
1136
|
+
head2 = ' ' * namewidth
|
1137
|
+
s << (gchar * (len - s.length))
|
1138
|
+
s.gsub!(/(.{1,10})/n, " \\1")
|
1139
|
+
s.gsub!(seqregexp, "\\1\n")
|
1140
|
+
a = s.split(/^/)
|
1141
|
+
head += a.shift
|
1142
|
+
ret = a.collect { |x| head2 + x }
|
1143
|
+
ret.unshift(head)
|
1144
|
+
ret
|
1145
|
+
end
|
1146
|
+
lines = (len + seqwidth - 1).div(seqwidth)
|
1147
|
+
[ aln, aseqs, lines ]
|
1148
|
+
end
|
1149
|
+
|
1150
|
+
# Generates Molphy alignment format text as a string
|
1151
|
+
def output_molphy(options = {})
|
1152
|
+
len = self.alignment_length
|
1153
|
+
header = "#{self.number_of_sequences} #{len}\n"
|
1154
|
+
sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') }
|
1155
|
+
if options[:replace_space]
|
1156
|
+
sn.collect! { |x| x.gsub(/\s/, '_') }
|
1157
|
+
end
|
1158
|
+
if !options.has_key?(:escape) or options[:escape]
|
1159
|
+
sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
|
1160
|
+
end
|
1161
|
+
if !options.has_key?(:split) or options[:split]
|
1162
|
+
sn.collect! { |x| x.split(/\s/)[0].to_s }
|
1163
|
+
end
|
1164
|
+
if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
|
1165
|
+
sn = __clustal_avoid_same_name(sn, 30)
|
1166
|
+
end
|
1167
|
+
|
1168
|
+
seqwidth = (options[:width] or 60)
|
1169
|
+
seqregexp = Regexp.new("(.{1,#{seqwidth}})")
|
1170
|
+
gchar = (options[:gap_char] or '-')
|
1171
|
+
|
1172
|
+
aseqs = Array.new(len).clear
|
1173
|
+
self.each_seq do |s|
|
1174
|
+
aseqs << s.to_s.gsub(self.gap_regexp, gchar)
|
1175
|
+
end
|
1176
|
+
case options[:case].to_s
|
1177
|
+
when /lower/i
|
1178
|
+
aseqs.each { |s| s.downcase! }
|
1179
|
+
when /upper/i
|
1180
|
+
aseqs.each { |s| s.upcase! }
|
1181
|
+
end
|
1182
|
+
|
1183
|
+
aseqs.collect! do |s|
|
1184
|
+
s << (gchar * (len - s.length))
|
1185
|
+
s.gsub!(seqregexp, "\\1\n")
|
1186
|
+
sn.shift + "\n" + s
|
1187
|
+
end
|
1188
|
+
aseqs.unshift(header)
|
1189
|
+
aseqs.join('')
|
1190
|
+
end
|
1191
|
+
|
1192
|
+
# Generates msf formatted text as a string
|
1193
|
+
def output_msf(options = {})
|
1194
|
+
len = self.seq_length
|
1195
|
+
|
1196
|
+
if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
|
1197
|
+
sn = __clustal_avoid_same_name(self.sequence_names)
|
1198
|
+
else
|
1199
|
+
sn = self.sequence_names.collect do |x|
|
1200
|
+
x.to_s.gsub(/[\r\n\x00]/, ' ')
|
1201
|
+
end
|
1202
|
+
end
|
1203
|
+
if !options.has_key?(:replace_space) or options[:replace_space]
|
1204
|
+
sn.collect! { |x| x.gsub(/\s/, '_') }
|
1205
|
+
end
|
1206
|
+
if !options.has_key?(:escape) or options[:escape]
|
1207
|
+
sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
|
1208
|
+
end
|
1209
|
+
if !options.has_key?(:split) or options[:split]
|
1210
|
+
sn.collect! { |x| x.split(/\s/)[0].to_s }
|
1211
|
+
end
|
1212
|
+
|
1213
|
+
seqwidth = 50
|
1214
|
+
namewidth = [31, sn.collect { |x| x.length }.max ].min
|
1215
|
+
sep = ' ' * 2
|
1216
|
+
|
1217
|
+
seqregexp = Regexp.new("(.{1,#{seqwidth}})")
|
1218
|
+
gchar = (options[:gap_char] or '.')
|
1219
|
+
pchar = (options[:padding_char] or '~')
|
1220
|
+
|
1221
|
+
aseqs = Array.new(self.number_of_sequences).clear
|
1222
|
+
self.each_seq do |s|
|
1223
|
+
aseqs << s.to_s.gsub(self.gap_regexp, gchar)
|
1224
|
+
end
|
1225
|
+
aseqs.each do |s|
|
1226
|
+
s.sub!(/\A#{Regexp.escape(gchar)}+/) { |x| pchar * x.length }
|
1227
|
+
s.sub!(/#{Regexp.escape(gchar)}+\z/, '')
|
1228
|
+
s << (pchar * (len - s.length))
|
1229
|
+
end
|
1230
|
+
|
1231
|
+
case options[:case].to_s
|
1232
|
+
when /lower/i
|
1233
|
+
aseqs.each { |s| s.downcase! }
|
1234
|
+
when /upper/i
|
1235
|
+
aseqs.each { |s| s.upcase! }
|
1236
|
+
else #default upcase
|
1237
|
+
aseqs.each { |s| s.upcase! }
|
1238
|
+
end
|
1239
|
+
|
1240
|
+
case options[:type].to_s
|
1241
|
+
when /protein/i, /aa/i
|
1242
|
+
amino = true
|
1243
|
+
when /na/i
|
1244
|
+
amino = false
|
1245
|
+
else
|
1246
|
+
if seqclass == Bio::Sequence::AA then
|
1247
|
+
amino = true
|
1248
|
+
elsif seqclass == Bio::Sequence::NA then
|
1249
|
+
amino = false
|
1250
|
+
else
|
1251
|
+
# if we can't determine, we asuume as protein.
|
1252
|
+
amino = aseqs.size
|
1253
|
+
aseqs.each { |x| amino -= 1 if /\A[acgt]\z/i =~ x }
|
1254
|
+
amino = false if amino <= 0
|
1255
|
+
end
|
1256
|
+
end
|
1257
|
+
|
1258
|
+
seq_type = (amino ? 'P' : 'N')
|
1259
|
+
|
1260
|
+
fn = (options[:entry_id] or self.__id__.abs.to_s + '.msf')
|
1261
|
+
dt = (options[:time] or Time.now).strftime('%B %d, %Y %H:%M')
|
1262
|
+
|
1263
|
+
sums = aseqs.collect { |s| GCG::Seq.calc_checksum(s) }
|
1264
|
+
#sums = aseqs.collect { |s| 0 }
|
1265
|
+
sum = 0; sums.each { |x| sum += x }; sum %= 10000
|
1266
|
+
msf =
|
1267
|
+
[
|
1268
|
+
"#{seq_type == 'N' ? 'N' : 'A' }A_MULTIPLE_ALIGNMENT 1.0\n",
|
1269
|
+
"\n",
|
1270
|
+
"\n",
|
1271
|
+
" #{fn} MSF: #{len} Type: #{seq_type} #{dt} Check: #{sum} ..\n",
|
1272
|
+
"\n"
|
1273
|
+
]
|
1274
|
+
|
1275
|
+
sn.each do |snx|
|
1276
|
+
msf << ' Name: ' +
|
1277
|
+
sprintf('%*s', -namewidth, snx.to_s)[0, namewidth] +
|
1278
|
+
" Len: #{len} Check: #{sums.shift} Weight: 1.00\n"
|
1279
|
+
end
|
1280
|
+
msf << "\n//\n"
|
1281
|
+
|
1282
|
+
aseqs.collect! do |s|
|
1283
|
+
snx = sn.shift
|
1284
|
+
head = sprintf("%*s", namewidth, snx.to_s)[0, namewidth] + sep
|
1285
|
+
s.gsub!(seqregexp, "\\1\n")
|
1286
|
+
a = s.split(/^/)
|
1287
|
+
a.collect { |x| head + x }
|
1288
|
+
end
|
1289
|
+
lines = (len + seqwidth - 1).div(seqwidth)
|
1290
|
+
i = 1
|
1291
|
+
lines.times do
|
1292
|
+
msf << "\n"
|
1293
|
+
n_l = i
|
1294
|
+
n_r = [ i + seqwidth - 1, len ].min
|
1295
|
+
if n_l != n_r then
|
1296
|
+
w = [ n_r - n_l + 1 - n_l.to_s.length - n_r.to_s.length, 1 ].max
|
1297
|
+
msf << (' ' * namewidth + sep + n_l.to_s +
|
1298
|
+
' ' * w + n_r.to_s + "\n")
|
1299
|
+
else
|
1300
|
+
msf << (' ' * namewidth + sep + n_l.to_s + "\n")
|
1301
|
+
end
|
1302
|
+
aseqs.each { |a| msf << a.shift }
|
1303
|
+
i += seqwidth
|
1304
|
+
end
|
1305
|
+
msf << "\n"
|
1306
|
+
msf.join('')
|
1307
|
+
end
|
1308
|
+
|
1309
|
+
end #module Output
|
1310
|
+
|
1311
|
+
module EnumerableExtension
|
1312
|
+
include Output
|
1313
|
+
|
1314
|
+
# Returns number of sequences in this alignment.
|
1315
|
+
def number_of_sequences
|
1316
|
+
i = 0
|
1317
|
+
self.each_seq { |s| i += 1 }
|
1318
|
+
i
|
1319
|
+
end
|
1320
|
+
|
1321
|
+
# Returns an array of sequence names.
|
1322
|
+
# The order of the names must be the same as
|
1323
|
+
# the order of <tt>each_seq</tt>.
|
1324
|
+
def sequence_names
|
1325
|
+
(0...(self.number_of_sequences)).to_a
|
1326
|
+
end
|
1327
|
+
end #module EnumerableExtension
|
1328
|
+
|
1329
|
+
# Bio::Alignment::ArrayExtension is a set of useful methods for
|
1330
|
+
# multiple sequence alignment.
|
1331
|
+
# It is designed to be extended to array objects or
|
1332
|
+
# included in your own classes which inherit Array.
|
1333
|
+
# (It can also be included in Array, though not recommended.)
|
1334
|
+
#
|
1335
|
+
# It possesses all methods defined in EnumerableExtension.
|
1336
|
+
# For usage of methods, please refer to EnumerableExtension.
|
1337
|
+
module ArrayExtension
|
1338
|
+
include EnumerableExtension
|
1339
|
+
|
1340
|
+
# Iterates over each sequences.
|
1341
|
+
# Yields a sequence.
|
1342
|
+
#
|
1343
|
+
# It works the same as Array#each.
|
1344
|
+
def each_seq(&block) #:yields: seq
|
1345
|
+
each(&block)
|
1346
|
+
end
|
1347
|
+
|
1348
|
+
# Returns number of sequences in this alignment.
|
1349
|
+
def number_of_sequences
|
1350
|
+
self.size
|
1351
|
+
end
|
1352
|
+
end #module ArrayExtension
|
1353
|
+
|
1354
|
+
# Bio::Alignment::HashExtension is a set of useful methods for
|
1355
|
+
# multiple sequence alignment.
|
1356
|
+
# It is designed to be extended to hash objects or
|
1357
|
+
# included in your own classes which inherit Hash.
|
1358
|
+
# (It can also be included in Hash, though not recommended.)
|
1359
|
+
#
|
1360
|
+
# It possesses all methods defined in EnumerableExtension.
|
1361
|
+
# For usage of methods, please refer to EnumerableExtension.
|
1362
|
+
#
|
1363
|
+
# Because SequenceHash#alignment_collect is redefined,
|
1364
|
+
# some methods' return value's class are changed to
|
1365
|
+
# SequenceHash instead of SequenceArray.
|
1366
|
+
#
|
1367
|
+
# Because the order of the objects in a hash is inconstant,
|
1368
|
+
# some methods strictly affected with the order of objects
|
1369
|
+
# might not work correctly,
|
1370
|
+
# e.g. EnumerableExtension#convert_match and #convert_unmatch.
|
1371
|
+
module HashExtension
|
1372
|
+
include EnumerableExtension
|
1373
|
+
|
1374
|
+
# Iterates over each sequences.
|
1375
|
+
# Yields a sequence.
|
1376
|
+
#
|
1377
|
+
# It works the same as Hash#each_value.
|
1378
|
+
def each_seq #:yields: seq
|
1379
|
+
#each_value(&block)
|
1380
|
+
each_key { |k| yield self[k] }
|
1381
|
+
end
|
1382
|
+
|
1383
|
+
# Iterates over each sequence and each results running block
|
1384
|
+
# are collected and returns a new alignment as a
|
1385
|
+
# Bio::Alignment::SequenceHash object.
|
1386
|
+
#
|
1387
|
+
# Note that it would be redefined if you want to change
|
1388
|
+
# return value's class.
|
1389
|
+
#
|
1390
|
+
def alignment_collect
|
1391
|
+
a = SequenceHash.new
|
1392
|
+
a.set_all_property(get_all_property)
|
1393
|
+
each_pair do |key, str|
|
1394
|
+
a.store(key, yield(str))
|
1395
|
+
end
|
1396
|
+
a
|
1397
|
+
end
|
1398
|
+
|
1399
|
+
# Concatenates the given alignment.
|
1400
|
+
# If <em>align</em> is a Hash (or SequenceHash),
|
1401
|
+
# sequences of same keys are concatenated.
|
1402
|
+
# Otherwise, <em>align</em> must have <tt>each_seq</tt>
|
1403
|
+
# or <tt>each</tt> method and
|
1404
|
+
# works same as EnumerableExtension#alignment_concat.
|
1405
|
+
#
|
1406
|
+
# Returns self.
|
1407
|
+
#
|
1408
|
+
# Note that it is a destructive method.
|
1409
|
+
#
|
1410
|
+
def alignment_concat(align)
|
1411
|
+
flag = nil
|
1412
|
+
begin
|
1413
|
+
align.each_pair do |key, seq|
|
1414
|
+
flag = true
|
1415
|
+
if origseq = self[key]
|
1416
|
+
origseq.concat(seq)
|
1417
|
+
end
|
1418
|
+
end
|
1419
|
+
return self
|
1420
|
+
rescue NoMethodError, ArgumentError =>evar
|
1421
|
+
raise evar if flag
|
1422
|
+
end
|
1423
|
+
a = values
|
1424
|
+
i = 0
|
1425
|
+
begin
|
1426
|
+
align.each_seq do |seq|
|
1427
|
+
flag = true
|
1428
|
+
a[i].concat(seq) if a[i] and seq
|
1429
|
+
i += 1
|
1430
|
+
end
|
1431
|
+
return self
|
1432
|
+
rescue NoMethodError, ArgumentError => evar
|
1433
|
+
raise evar if flag
|
1434
|
+
end
|
1435
|
+
align.each do |seq|
|
1436
|
+
a[i].concat(seq) if a[i] and seq
|
1437
|
+
i += 1
|
1438
|
+
end
|
1439
|
+
self
|
1440
|
+
end
|
1441
|
+
|
1442
|
+
# Returns number of sequences in this alignment.
|
1443
|
+
def number_of_sequences
|
1444
|
+
self.size
|
1445
|
+
end
|
1446
|
+
|
1447
|
+
# Returns an array of sequence names.
|
1448
|
+
# The order of the names must be the same as
|
1449
|
+
# the order of <tt>each_seq</tt>.
|
1450
|
+
def sequence_names
|
1451
|
+
self.keys
|
1452
|
+
end
|
1453
|
+
end #module HashExtension
|
1454
|
+
|
1455
|
+
# Bio::Alignment::SequenceArray is a container class of
|
1456
|
+
# multiple sequence alignment.
|
1457
|
+
# Since it inherits Array, it acts completely same as Array.
|
1458
|
+
# In addition, methods defined in ArrayExtension and EnumerableExtension
|
1459
|
+
# can be used.
|
1460
|
+
class SequenceArray < Array
|
1461
|
+
include ArrayExtension
|
1462
|
+
end #class SequenceArray
|
1463
|
+
|
1464
|
+
# Bio::Alignment::SequenceHash is a container class of
|
1465
|
+
# multiple sequence alignment.
|
1466
|
+
# Since it inherits Hash, it acts completely same as Hash.
|
1467
|
+
# In addition, methods defined in HashExtension and EnumerableExtension
|
1468
|
+
# can be used.
|
1469
|
+
class SequenceHash < Hash
|
1470
|
+
include HashExtension
|
1471
|
+
end #class SequenceHash
|
1472
|
+
|
1473
|
+
# Bio::Alignment::OriginalPrivate is a set of private methods
|
1474
|
+
# for Bio::Alignment::OriginalAlignment.
|
1475
|
+
module OriginalPrivate
|
1476
|
+
|
1477
|
+
# Gets the sequence from given object.
|
1478
|
+
def extract_seq(obj)
|
1479
|
+
seq = nil
|
1480
|
+
if obj.is_a?(Bio::Sequence::NA) or obj.is_a?(Bio::Sequence::AA) then
|
1481
|
+
seq = obj
|
1482
|
+
else
|
1483
|
+
for m in [ :seq, :naseq, :aaseq ]
|
1484
|
+
begin
|
1485
|
+
seq = obj.send(m)
|
1486
|
+
rescue NameError, ArgumentError
|
1487
|
+
seq = nil
|
1488
|
+
end
|
1489
|
+
break if seq
|
1490
|
+
end
|
1491
|
+
seq = obj unless seq
|
1492
|
+
end
|
1493
|
+
seq
|
1494
|
+
end
|
1495
|
+
module_function :extract_seq
|
1496
|
+
|
1497
|
+
# Gets the name or the definition of the sequence from given object.
|
1498
|
+
def extract_key(obj)
|
1499
|
+
sn = nil
|
1500
|
+
for m in [ :definition, :entry_id ]
|
1501
|
+
begin
|
1502
|
+
sn = obj.send(m)
|
1503
|
+
rescue NameError, ArgumentError
|
1504
|
+
sn = nil
|
1505
|
+
end
|
1506
|
+
break if sn
|
1507
|
+
end
|
1508
|
+
sn
|
1509
|
+
end
|
1510
|
+
module_function :extract_key
|
1511
|
+
end #module OriginalPrivate
|
1512
|
+
|
1513
|
+
# Bio::Alignment::OriginalAlignment is
|
1514
|
+
# the BioRuby original multiple sequence alignment container class.
|
1515
|
+
# It includes HashExtension.
|
1516
|
+
#
|
1517
|
+
# It is recommended only to use methods defined in EnumerableExtension
|
1518
|
+
# (and the each_seq method).
|
1519
|
+
# The method only defined in this class might be obsoleted in the future.
|
1520
|
+
#
|
1521
|
+
class OriginalAlignment
|
1522
|
+
|
1523
|
+
include Enumerable
|
1524
|
+
include HashExtension
|
1525
|
+
include OriginalPrivate
|
1526
|
+
|
1527
|
+
# Read files and creates a new alignment object.
|
1528
|
+
#
|
1529
|
+
# It will be obsoleted.
|
1530
|
+
def self.readfiles(*files)
|
1531
|
+
require 'bio/io/flatfile'
|
1532
|
+
aln = self.new
|
1533
|
+
files.each do |fn|
|
1534
|
+
Bio::FlatFile.open(nil, fn) do |ff|
|
1535
|
+
aln.add_sequences(ff)
|
1536
|
+
end
|
1537
|
+
end
|
1538
|
+
aln
|
1539
|
+
end
|
1540
|
+
|
1541
|
+
# Creates a new alignment object from given arguments.
|
1542
|
+
#
|
1543
|
+
# It will be obsoleted.
|
1544
|
+
def self.new2(*arg)
|
1545
|
+
self.new(arg)
|
1546
|
+
end
|
1547
|
+
|
1548
|
+
# Creates a new alignment object.
|
1549
|
+
# <em>seqs</em> may be one of follows:
|
1550
|
+
# an array of sequences (or strings),
|
1551
|
+
# an array of sequence database objects,
|
1552
|
+
# an alignment object.
|
1553
|
+
def initialize(seqs = [])
|
1554
|
+
@seqs = {}
|
1555
|
+
@keys = []
|
1556
|
+
self.add_sequences(seqs)
|
1557
|
+
end
|
1558
|
+
|
1559
|
+
# If <em>x</em> is the same value, returns true.
|
1560
|
+
# Otherwise, returns false.
|
1561
|
+
def ==(x)
|
1562
|
+
#(original)
|
1563
|
+
if x.is_a?(self.class)
|
1564
|
+
self.to_hash == x.to_hash
|
1565
|
+
else
|
1566
|
+
false
|
1567
|
+
end
|
1568
|
+
end
|
1569
|
+
|
1570
|
+
# convert to hash
|
1571
|
+
def to_hash
|
1572
|
+
#(Hash-like)
|
1573
|
+
@seqs
|
1574
|
+
end
|
1575
|
+
|
1576
|
+
# Adds sequences to the alignment.
|
1577
|
+
# <em>seqs</em> may be one of follows:
|
1578
|
+
# an array of sequences (or strings),
|
1579
|
+
# an array of sequence database objects,
|
1580
|
+
# an alignment object.
|
1581
|
+
def add_sequences(seqs)
|
1582
|
+
if block_given? then
|
1583
|
+
seqs.each do |x|
|
1584
|
+
s, key = yield x
|
1585
|
+
self.store(key, s)
|
1586
|
+
end
|
1587
|
+
else
|
1588
|
+
if seqs.is_a?(self.class) then
|
1589
|
+
seqs.each_pair do |k, s|
|
1590
|
+
self.store(k, s)
|
1591
|
+
end
|
1592
|
+
elsif seqs.respond_to?(:each_pair)
|
1593
|
+
seqs.each_pair do |k, x|
|
1594
|
+
s = extract_seq(x)
|
1595
|
+
self.store(k, s)
|
1596
|
+
end
|
1597
|
+
else
|
1598
|
+
seqs.each do |x|
|
1599
|
+
s = extract_seq(x)
|
1600
|
+
k = extract_key(x)
|
1601
|
+
self.store(k, s)
|
1602
|
+
end
|
1603
|
+
end
|
1604
|
+
end
|
1605
|
+
self
|
1606
|
+
end
|
1607
|
+
|
1608
|
+
# identifiers (or definitions or names) of the sequences
|
1609
|
+
attr_reader :keys
|
1610
|
+
|
1611
|
+
# stores a sequences with the name
|
1612
|
+
# key:: name of the sequence
|
1613
|
+
# seq:: sequence
|
1614
|
+
def __store__(key, seq)
|
1615
|
+
#(Hash-like)
|
1616
|
+
h = { key => seq }
|
1617
|
+
@keys << h.keys[0]
|
1618
|
+
@seqs.update(h)
|
1619
|
+
seq
|
1620
|
+
end
|
1621
|
+
|
1622
|
+
# stores a sequence with <em>key</em>
|
1623
|
+
# (name or definition of the sequence).
|
1624
|
+
# Unlike <tt>__store__</tt> method, the method doesn't allow
|
1625
|
+
# same keys.
|
1626
|
+
# If the key is already used, returns nil.
|
1627
|
+
# When succeeded, returns key.
|
1628
|
+
def store(key, seq)
|
1629
|
+
#(Hash-like) returns key instead of seq
|
1630
|
+
if @seqs.has_key?(key) then
|
1631
|
+
# don't allow same key
|
1632
|
+
# New key is discarded, while existing key is preserved.
|
1633
|
+
key = nil
|
1634
|
+
end
|
1635
|
+
unless key then
|
1636
|
+
unless defined?(@serial)
|
1637
|
+
@serial = 0
|
1638
|
+
end
|
1639
|
+
@serial = @seqs.size if @seqs.size > @serial
|
1640
|
+
while @seqs.has_key?(@serial)
|
1641
|
+
@serial += 1
|
1642
|
+
end
|
1643
|
+
key = @serial
|
1644
|
+
end
|
1645
|
+
self.__store__(key, seq)
|
1646
|
+
key
|
1647
|
+
end
|
1648
|
+
|
1649
|
+
# Reconstructs internal data structure.
|
1650
|
+
# (Like Hash#rehash)
|
1651
|
+
def rehash
|
1652
|
+
@seqs.rehash
|
1653
|
+
oldkeys = @keys
|
1654
|
+
tmpkeys = @seqs.keys
|
1655
|
+
@keys.collect! do |k|
|
1656
|
+
tmpkeys.delete(k)
|
1657
|
+
end
|
1658
|
+
@keys.compact!
|
1659
|
+
@keys.concat(tmpkeys)
|
1660
|
+
self
|
1661
|
+
end
|
1662
|
+
|
1663
|
+
# Prepends seq (with key) to the front of the alignment.
|
1664
|
+
# (Like Array#unshift)
|
1665
|
+
def unshift(key, seq)
|
1666
|
+
#(Array-like)
|
1667
|
+
self.store(key, seq)
|
1668
|
+
k = @keys.pop
|
1669
|
+
@keys.unshift(k)
|
1670
|
+
k
|
1671
|
+
end
|
1672
|
+
|
1673
|
+
# Removes the first sequence in the alignment and
|
1674
|
+
# returns [ key, seq ].
|
1675
|
+
def shift
|
1676
|
+
k = @keys.shift
|
1677
|
+
if k then
|
1678
|
+
s = @seqs.delete(k)
|
1679
|
+
[ k, s ]
|
1680
|
+
else
|
1681
|
+
nil
|
1682
|
+
end
|
1683
|
+
end
|
1684
|
+
|
1685
|
+
# Gets the <em>n</em>-th sequence.
|
1686
|
+
# If not found, returns nil.
|
1687
|
+
def order(n)
|
1688
|
+
#(original)
|
1689
|
+
@seqs[@keys[n]]
|
1690
|
+
end
|
1691
|
+
|
1692
|
+
# Removes the sequence whose key is <em>key</em>.
|
1693
|
+
# Returns the removed sequence.
|
1694
|
+
# If not found, returns nil.
|
1695
|
+
def delete(key)
|
1696
|
+
#(Hash-like)
|
1697
|
+
@keys.delete(key)
|
1698
|
+
@seqs.delete(key)
|
1699
|
+
end
|
1700
|
+
|
1701
|
+
# Returns sequences. (Like Hash#values)
|
1702
|
+
def values
|
1703
|
+
#(Hash-like)
|
1704
|
+
@keys.collect { |k| @seqs[k] }
|
1705
|
+
end
|
1706
|
+
|
1707
|
+
# Adds a sequence without key.
|
1708
|
+
# The key is automatically determined.
|
1709
|
+
def <<(seq)
|
1710
|
+
#(Array-like)
|
1711
|
+
self.store(nil, seq)
|
1712
|
+
self
|
1713
|
+
end
|
1714
|
+
|
1715
|
+
# Gets a sequence. (Like Hash#[])
|
1716
|
+
def [](*arg)
|
1717
|
+
#(Hash-like)
|
1718
|
+
@seqs[*arg]
|
1719
|
+
end
|
1720
|
+
|
1721
|
+
# Number of sequences in the alignment.
|
1722
|
+
def size
|
1723
|
+
#(Hash&Array-like)
|
1724
|
+
@seqs.size
|
1725
|
+
end
|
1726
|
+
alias number_of_sequences size
|
1727
|
+
|
1728
|
+
# If the key exists, returns true. Otherwise, returns false.
|
1729
|
+
# (Like Hash#has_key?)
|
1730
|
+
def has_key?(key)
|
1731
|
+
#(Hash-like)
|
1732
|
+
@seqs.has_key?(key)
|
1733
|
+
end
|
1734
|
+
|
1735
|
+
# Iterates over each sequence.
|
1736
|
+
# (Like Array#each)
|
1737
|
+
def each
|
1738
|
+
#(Array-like)
|
1739
|
+
@keys.each do |k|
|
1740
|
+
yield @seqs[k]
|
1741
|
+
end
|
1742
|
+
end
|
1743
|
+
alias each_seq each
|
1744
|
+
|
1745
|
+
# Iterates over each key and sequence.
|
1746
|
+
# (Like Hash#each_pair)
|
1747
|
+
def each_pair
|
1748
|
+
#(Hash-like)
|
1749
|
+
@keys.each do |k|
|
1750
|
+
yield k, @seqs[k]
|
1751
|
+
end
|
1752
|
+
end
|
1753
|
+
|
1754
|
+
# Iterates over each sequence, replacing the sequence with the
|
1755
|
+
# value returned by the block.
|
1756
|
+
def collect!
|
1757
|
+
#(Array-like)
|
1758
|
+
@keys.each do |k|
|
1759
|
+
@seqs[k] = yield @seqs[k]
|
1760
|
+
end
|
1761
|
+
end
|
1762
|
+
|
1763
|
+
###--
|
1764
|
+
### note that 'collect' and 'to_a' is defined in Enumerable
|
1765
|
+
###
|
1766
|
+
### instance-variable-related methods
|
1767
|
+
###++
|
1768
|
+
|
1769
|
+
# Creates new alignment. Internal use only.
|
1770
|
+
def new(*arg)
|
1771
|
+
na = self.class.new(*arg)
|
1772
|
+
na.set_all_property(get_all_property)
|
1773
|
+
na
|
1774
|
+
end
|
1775
|
+
protected :new
|
1776
|
+
|
1777
|
+
# Duplicates the alignment
|
1778
|
+
def dup
|
1779
|
+
#(Hash-like)
|
1780
|
+
self.new(self)
|
1781
|
+
end
|
1782
|
+
|
1783
|
+
#--
|
1784
|
+
# methods below should not access instance variables
|
1785
|
+
#++
|
1786
|
+
|
1787
|
+
# Merges given alignment and returns a new alignment.
|
1788
|
+
def merge(*other)
|
1789
|
+
#(Hash-like)
|
1790
|
+
na = self.new(self)
|
1791
|
+
na.merge!(*other)
|
1792
|
+
na
|
1793
|
+
end
|
1794
|
+
|
1795
|
+
# Merge given alignment.
|
1796
|
+
# Note that it is destructive method.
|
1797
|
+
def merge!(*other)
|
1798
|
+
#(Hash-like)
|
1799
|
+
if block_given? then
|
1800
|
+
other.each do |aln|
|
1801
|
+
aln.each_pair do |k, s|
|
1802
|
+
if self.has_key?(k) then
|
1803
|
+
s = yield k, self[k], s
|
1804
|
+
self.to_hash.store(k, s)
|
1805
|
+
else
|
1806
|
+
self.store(k, s)
|
1807
|
+
end
|
1808
|
+
end
|
1809
|
+
end
|
1810
|
+
else
|
1811
|
+
other.each do |aln|
|
1812
|
+
aln.each_pair do |k, s|
|
1813
|
+
self.delete(k) if self.has_key?(k)
|
1814
|
+
self.store(k, s)
|
1815
|
+
end
|
1816
|
+
end
|
1817
|
+
end
|
1818
|
+
self
|
1819
|
+
end
|
1820
|
+
|
1821
|
+
# Returns the key for a given sequence. If not found, returns nil.
|
1822
|
+
def index(seq)
|
1823
|
+
#(Hash-like)
|
1824
|
+
last_key = nil
|
1825
|
+
self.each_pair do |k, s|
|
1826
|
+
last_key = k
|
1827
|
+
if s.class == seq.class then
|
1828
|
+
r = (s == seq)
|
1829
|
+
else
|
1830
|
+
r = (s.to_s == seq.to_s)
|
1831
|
+
end
|
1832
|
+
break if r
|
1833
|
+
end
|
1834
|
+
last_key
|
1835
|
+
end
|
1836
|
+
|
1837
|
+
# Sequences in the alignment are duplicated.
|
1838
|
+
# If keys are given to the argument, sequences of given keys are
|
1839
|
+
# duplicated.
|
1840
|
+
#
|
1841
|
+
# It will be obsoleted.
|
1842
|
+
def isolate(*arg)
|
1843
|
+
#(original)
|
1844
|
+
if arg.size == 0 then
|
1845
|
+
self.collect! do |s|
|
1846
|
+
seqclass.new(s)
|
1847
|
+
end
|
1848
|
+
else
|
1849
|
+
arg.each do |k|
|
1850
|
+
if self.has_key?(k) then
|
1851
|
+
s = self.delete(key)
|
1852
|
+
self.store(k, seqclass.new(s))
|
1853
|
+
end
|
1854
|
+
end
|
1855
|
+
end
|
1856
|
+
self
|
1857
|
+
end
|
1858
|
+
|
1859
|
+
# Iterates over each sequence and each results running block
|
1860
|
+
# are collected and returns a new alignment.
|
1861
|
+
#
|
1862
|
+
# The method name 'collect_align' will be obsoleted.
|
1863
|
+
# Please use 'alignment_collect' instead.
|
1864
|
+
def alignment_collect
|
1865
|
+
#(original)
|
1866
|
+
na = self.class.new
|
1867
|
+
na.set_all_property(get_all_property)
|
1868
|
+
self.each_pair do |k, s|
|
1869
|
+
na.store(k, yield(s))
|
1870
|
+
end
|
1871
|
+
na
|
1872
|
+
end
|
1873
|
+
alias collect_align alignment_collect
|
1874
|
+
|
1875
|
+
# Removes empty sequences or nil in the alignment.
|
1876
|
+
# (Like Array#compact!)
|
1877
|
+
def compact!
|
1878
|
+
#(Array-like)
|
1879
|
+
d = []
|
1880
|
+
self.each_pair do |k, s|
|
1881
|
+
if !s or s.empty?
|
1882
|
+
d << k
|
1883
|
+
end
|
1884
|
+
end
|
1885
|
+
d.each do |k|
|
1886
|
+
self.delete(k)
|
1887
|
+
end
|
1888
|
+
d.empty? ? nil : d
|
1889
|
+
end
|
1890
|
+
|
1891
|
+
# Removes empty sequences or nil and returns new alignment.
|
1892
|
+
# (Like Array#compact)
|
1893
|
+
def compact
|
1894
|
+
#(Array-like)
|
1895
|
+
na = self.dup
|
1896
|
+
na.compact!
|
1897
|
+
na
|
1898
|
+
end
|
1899
|
+
|
1900
|
+
# Adds a sequence to the alignment.
|
1901
|
+
# Returns key if succeeded.
|
1902
|
+
# Returns nil (and not added to the alignment) if key is already used.
|
1903
|
+
#
|
1904
|
+
# It resembles BioPerl's AlignI::add_seq method.
|
1905
|
+
def add_seq(seq, key = nil)
|
1906
|
+
#(BioPerl) AlignI::add_seq like method
|
1907
|
+
unless seq.is_a?(Bio::Sequence::NA) or seq.is_a?(Bio::Sequence::AA)
|
1908
|
+
s = extract_seq(seq)
|
1909
|
+
key = extract_key(seq) unless key
|
1910
|
+
seq = s
|
1911
|
+
end
|
1912
|
+
self.store(key, seq)
|
1913
|
+
end
|
1914
|
+
|
1915
|
+
# Removes given sequence from the alignment.
|
1916
|
+
# Returns removed sequence. If nothing removed, returns nil.
|
1917
|
+
#
|
1918
|
+
# It resembles BioPerl's AlignI::remove_seq.
|
1919
|
+
def remove_seq(seq)
|
1920
|
+
#(BioPerl) AlignI::remove_seq like method
|
1921
|
+
if k = self.index(seq) then
|
1922
|
+
self.delete(k)
|
1923
|
+
else
|
1924
|
+
nil
|
1925
|
+
end
|
1926
|
+
end
|
1927
|
+
|
1928
|
+
# Removes sequences from the alignment by given keys.
|
1929
|
+
# Returns an alignment object consists of removed sequences.
|
1930
|
+
#
|
1931
|
+
# It resembles BioPerl's AlignI::purge method.
|
1932
|
+
def purge(*arg)
|
1933
|
+
#(BioPerl) AlignI::purge like method
|
1934
|
+
purged = self.new
|
1935
|
+
arg.each do |k|
|
1936
|
+
if self[k] then
|
1937
|
+
purged.store(k, self.delete(k))
|
1938
|
+
end
|
1939
|
+
end
|
1940
|
+
purged
|
1941
|
+
end
|
1942
|
+
|
1943
|
+
# If block is given, it acts like Array#select (Enumerable#select).
|
1944
|
+
# Returns a new alignment containing all sequences of the alignment
|
1945
|
+
# for which return value of given block is not false nor nil.
|
1946
|
+
#
|
1947
|
+
# If no block is given, it acts like the BioPerl's AlignI::select.
|
1948
|
+
# Returns a new alignment containing sequences of given keys.
|
1949
|
+
#
|
1950
|
+
# The BioPerl's AlignI::select-like action will be obsoleted.
|
1951
|
+
def select(*arg)
|
1952
|
+
#(original)
|
1953
|
+
na = self.new
|
1954
|
+
if block_given? then
|
1955
|
+
# 'arg' is ignored
|
1956
|
+
# nearly same action as Array#select (Enumerable#select)
|
1957
|
+
self.each_pair.each do |k, s|
|
1958
|
+
na.store(k, s) if yield(s)
|
1959
|
+
end
|
1960
|
+
else
|
1961
|
+
# BioPerl's AlignI::select like function
|
1962
|
+
arg.each do |k|
|
1963
|
+
if s = self[k] then
|
1964
|
+
na.store(k, s)
|
1965
|
+
end
|
1966
|
+
end
|
1967
|
+
end
|
1968
|
+
na
|
1969
|
+
end
|
1970
|
+
|
1971
|
+
# The method name <tt>slice</tt> will be obsoleted.
|
1972
|
+
# Please use <tt>alignment_slice</tt> instead.
|
1973
|
+
alias slice alignment_slice
|
1974
|
+
|
1975
|
+
# The method name <tt>subseq</tt> will be obsoleted.
|
1976
|
+
# Please use <tt>alignment_subseq</tt> instead.
|
1977
|
+
alias subseq alignment_subseq
|
1978
|
+
|
1979
|
+
# Not-destructive version of alignment_normalize!.
|
1980
|
+
# Returns a new alignment.
|
1981
|
+
def normalize
|
1982
|
+
#(original)
|
1983
|
+
na = self.dup
|
1984
|
+
na.alignment_normalize!
|
1985
|
+
na
|
1986
|
+
end
|
1987
|
+
|
1988
|
+
# Not-destructive version of alignment_rstrip!.
|
1989
|
+
# Returns a new alignment.
|
1990
|
+
def rstrip
|
1991
|
+
#(String-like)
|
1992
|
+
na = self.dup
|
1993
|
+
na.isolate
|
1994
|
+
na.alignment_rstrip!
|
1995
|
+
na
|
1996
|
+
end
|
1997
|
+
|
1998
|
+
# Not-destructive version of alignment_lstrip!.
|
1999
|
+
# Returns a new alignment.
|
2000
|
+
def lstrip
|
2001
|
+
#(String-like)
|
2002
|
+
na = self.dup
|
2003
|
+
na.isolate
|
2004
|
+
na.alignment_lstrip!
|
2005
|
+
na
|
2006
|
+
end
|
2007
|
+
|
2008
|
+
# Not-destructive version of alignment_strip!.
|
2009
|
+
# Returns a new alignment.
|
2010
|
+
def strip
|
2011
|
+
#(String-like)
|
2012
|
+
na = self.dup
|
2013
|
+
na.isolate
|
2014
|
+
na.alignment_strip!
|
2015
|
+
na
|
2016
|
+
end
|
2017
|
+
|
2018
|
+
# Not-destructive version of remove_gaps!.
|
2019
|
+
# Returns a new alignment.
|
2020
|
+
#
|
2021
|
+
# The method name 'remove_gap' will be obsoleted.
|
2022
|
+
# Please use 'remove_all_gaps' instead.
|
2023
|
+
def remove_all_gaps
|
2024
|
+
#(original)
|
2025
|
+
na = self.dup
|
2026
|
+
na.isolate
|
2027
|
+
na.remove_all_gaps!
|
2028
|
+
na
|
2029
|
+
end
|
2030
|
+
|
2031
|
+
# Concatenates a string or an alignment.
|
2032
|
+
# Returns self.
|
2033
|
+
#
|
2034
|
+
# Note that the method will be obsoleted.
|
2035
|
+
# Please use <tt>each_seq { |s| s << str }</tt> for concatenating
|
2036
|
+
# a string and
|
2037
|
+
# <tt>alignment_concat(aln)</tt> for concatenating an alignment.
|
2038
|
+
def concat(aln)
|
2039
|
+
#(String-like)
|
2040
|
+
if aln.respond_to?(:to_str) then #aln.is_a?(String)
|
2041
|
+
self.each do |s|
|
2042
|
+
s << aln
|
2043
|
+
end
|
2044
|
+
self
|
2045
|
+
else
|
2046
|
+
alignment_concat(aln)
|
2047
|
+
end
|
2048
|
+
end
|
2049
|
+
|
2050
|
+
# Replace the specified region of the alignment to aln.
|
2051
|
+
# aln:: String or Bio::Alignment object
|
2052
|
+
# arg:: same format as String#slice
|
2053
|
+
#
|
2054
|
+
# It will be obsoleted.
|
2055
|
+
def replace_slice(aln, *arg)
|
2056
|
+
#(original)
|
2057
|
+
if aln.respond_to?(:to_str) then #aln.is_a?(String)
|
2058
|
+
self.each do |s|
|
2059
|
+
s[*arg] = aln
|
2060
|
+
end
|
2061
|
+
elsif aln.is_a?(self.class) then
|
2062
|
+
aln.each_pair do |k, s|
|
2063
|
+
self[k][*arg] = s
|
2064
|
+
end
|
2065
|
+
else
|
2066
|
+
i = 0
|
2067
|
+
aln.each do |s|
|
2068
|
+
self.order(i)[*arg] = s
|
2069
|
+
i += 1
|
2070
|
+
end
|
2071
|
+
end
|
2072
|
+
self
|
2073
|
+
end
|
2074
|
+
|
2075
|
+
# Performs multiple alignment by using external program.
|
2076
|
+
def do_align(factory)
|
2077
|
+
a0 = self.class.new
|
2078
|
+
(0...self.size).each { |i| a0.store(i, self.order(i)) }
|
2079
|
+
r = factory.query(a0)
|
2080
|
+
a1 = r.alignment
|
2081
|
+
a0.keys.each do |k|
|
2082
|
+
unless a1[k.to_s] then
|
2083
|
+
raise 'alignment result is inconsistent with input data'
|
2084
|
+
end
|
2085
|
+
end
|
2086
|
+
a2 = self.new
|
2087
|
+
a0.keys.each do |k|
|
2088
|
+
a2.store(self.keys[k], a1[k.to_s])
|
2089
|
+
end
|
2090
|
+
a2
|
2091
|
+
end
|
2092
|
+
|
2093
|
+
# Convert to fasta format and returns an array of strings.
|
2094
|
+
#
|
2095
|
+
# It will be obsoleted.
|
2096
|
+
def to_fasta_array(*arg)
|
2097
|
+
#(original)
|
2098
|
+
width = nil
|
2099
|
+
if arg[0].is_a?(Integer) then
|
2100
|
+
width = arg.shift
|
2101
|
+
end
|
2102
|
+
options = (arg.shift or {})
|
2103
|
+
width = options[:width] unless width
|
2104
|
+
if options[:avoid_same_name] then
|
2105
|
+
na = __clustal_avoid_same_name(self.keys, 30)
|
2106
|
+
else
|
2107
|
+
na = self.keys.collect { |k| k.to_s.gsub(/[\r\n\x00]/, ' ') }
|
2108
|
+
end
|
2109
|
+
a = self.collect do |s|
|
2110
|
+
">#{na.shift}\n" +
|
2111
|
+
if width then
|
2112
|
+
s.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
|
2113
|
+
else
|
2114
|
+
s.to_s + "\n"
|
2115
|
+
end
|
2116
|
+
end
|
2117
|
+
a
|
2118
|
+
end
|
2119
|
+
|
2120
|
+
# Convets to fasta format and returns an array of FastaFormat objects.
|
2121
|
+
#
|
2122
|
+
# It will be obsoleted.
|
2123
|
+
def to_fastaformat_array(*arg)
|
2124
|
+
#(original)
|
2125
|
+
require 'bio/db/fasta'
|
2126
|
+
a = self.to_fasta_array(*arg)
|
2127
|
+
a.collect! do |x|
|
2128
|
+
Bio::FastaFormat.new(x)
|
2129
|
+
end
|
2130
|
+
a
|
2131
|
+
end
|
2132
|
+
|
2133
|
+
# Converts to fasta format and returns a string.
|
2134
|
+
#
|
2135
|
+
# The specification of the argument will be changed.
|
2136
|
+
#
|
2137
|
+
# Note: <tt>to_fasta</tt> is deprecated.
|
2138
|
+
# Please use <tt>output_fasta</tt> instead.
|
2139
|
+
def to_fasta(*arg)
|
2140
|
+
#(original)
|
2141
|
+
warn "to_fasta is deprecated. Please use output_fasta."
|
2142
|
+
self.to_fasta_array(*arg).join('')
|
2143
|
+
end
|
2144
|
+
|
2145
|
+
# The method name <tt>consensus</tt> will be obsoleted.
|
2146
|
+
# Please use <tt>consensus_string</tt> instead.
|
2147
|
+
alias consensus consensus_string
|
2148
|
+
end #class OriginalAlignment
|
2149
|
+
|
2150
|
+
# Bio::Alignment::GAP is a set of class methods for
|
2151
|
+
# gap-related position translation.
|
2152
|
+
module GAP
|
2153
|
+
# position with gaps are translated into the position without gaps.
|
2154
|
+
#<em>seq</em>:: sequence
|
2155
|
+
#<em>pos</em>:: position with gaps
|
2156
|
+
#<em>gap_regexp</em>:: regular expression to specify gaps
|
2157
|
+
def ungapped_pos(seq, pos, gap_regexp)
|
2158
|
+
p = seq[0..pos].gsub(gap_regexp, '').length
|
2159
|
+
p -= 1 if p > 0
|
2160
|
+
p
|
2161
|
+
end
|
2162
|
+
module_function :ungapped_pos
|
2163
|
+
|
2164
|
+
# position without gaps are translated into the position with gaps.
|
2165
|
+
#<em>seq</em>:: sequence
|
2166
|
+
#<em>pos</em>:: position with gaps
|
2167
|
+
#<em>gap_regexp</em>:: regular expression to specify gaps
|
2168
|
+
def gapped_pos(seq, pos, gap_regexp)
|
2169
|
+
olen = seq.gsub(gap_regexp, '').length
|
2170
|
+
pos = olen if pos >= olen
|
2171
|
+
pos = olen + pos if pos < 0
|
2172
|
+
|
2173
|
+
i = 0
|
2174
|
+
l = pos + 1
|
2175
|
+
while l > 0 and i < seq.length
|
2176
|
+
x = seq[i, l].gsub(gap_regexp, '').length
|
2177
|
+
i += l
|
2178
|
+
l -= x
|
2179
|
+
end
|
2180
|
+
i -= 1 if i > 0
|
2181
|
+
i
|
2182
|
+
end
|
2183
|
+
module_function :gapped_pos
|
2184
|
+
end # module GAP
|
2185
|
+
|
2186
|
+
# creates a new Bio::Alignment::OriginalAlignment object.
|
2187
|
+
# Please refer document of OriginalAlignment.new.
|
2188
|
+
def self.new(*arg)
|
2189
|
+
OriginalAlignment.new(*arg)
|
2190
|
+
end
|
2191
|
+
|
2192
|
+
# creates a new Bio::Alignment::OriginalAlignment object.
|
2193
|
+
# Please refer document of OriginalAlignment.new2.
|
2194
|
+
def self.new2(*arg)
|
2195
|
+
OriginalAlignment.new2(*arg)
|
2196
|
+
end
|
2197
|
+
|
2198
|
+
# creates a new Bio::Alignment::OriginalAlignment object.
|
2199
|
+
# Please refer document of OriginalAlignment.readfiles.
|
2200
|
+
def self.readfiles(*files)
|
2201
|
+
OriginalAlignment.readfiles(*files)
|
2202
|
+
end
|
2203
|
+
|
2204
|
+
#---
|
2205
|
+
# Service classes for multiple alignment applications
|
2206
|
+
#+++
|
2207
|
+
#---
|
2208
|
+
# Templates of alignment application factory
|
2209
|
+
#+++
|
2210
|
+
|
2211
|
+
# Namespace for templates for alignment application factory
|
2212
|
+
module FactoryTemplate
|
2213
|
+
|
2214
|
+
# Template class for alignment application factory.
|
2215
|
+
# The program acts:
|
2216
|
+
# input: stdin or file, format = fasta format
|
2217
|
+
# output: stdout (parser should be specified by DEFAULT_PARSER)
|
2218
|
+
class Simple
|
2219
|
+
|
2220
|
+
# Creates a new alignment factory
|
2221
|
+
def initialize(program = self.class::DEFAULT_PROGRAM, options = [])
|
2222
|
+
@program = program
|
2223
|
+
@options = options
|
2224
|
+
@command = nil
|
2225
|
+
@output = nil
|
2226
|
+
@report = nil
|
2227
|
+
@exit_status = nil
|
2228
|
+
@data_stdout = nil
|
2229
|
+
end
|
2230
|
+
|
2231
|
+
# program name
|
2232
|
+
attr_accessor :program
|
2233
|
+
|
2234
|
+
# options
|
2235
|
+
attr_accessor :options
|
2236
|
+
|
2237
|
+
# Last command-line string. Returns nil or an array of String.
|
2238
|
+
# Note that filenames described in the command-line may already
|
2239
|
+
# be removed because these files may be temporary files.
|
2240
|
+
attr_reader :command
|
2241
|
+
|
2242
|
+
# Last raw result of the program.
|
2243
|
+
# Return a string (or nil).
|
2244
|
+
attr_reader :output
|
2245
|
+
|
2246
|
+
# Last result object performed by the factory.
|
2247
|
+
attr_reader :report
|
2248
|
+
|
2249
|
+
# Last exit status
|
2250
|
+
attr_reader :exit_status
|
2251
|
+
|
2252
|
+
# Last output to the stdout.
|
2253
|
+
attr_accessor :data_stdout
|
2254
|
+
|
2255
|
+
# Clear the internal data and status, except program and options.
|
2256
|
+
def reset
|
2257
|
+
@command = nil
|
2258
|
+
@output = nil
|
2259
|
+
@report = nil
|
2260
|
+
@exit_status = nil
|
2261
|
+
@data_stdout = nil
|
2262
|
+
end
|
2263
|
+
|
2264
|
+
# Executes the program.
|
2265
|
+
# If +seqs+ is not nil, perform alignment for seqs.
|
2266
|
+
# If +seqs+ is nil, simply executes the program.
|
2267
|
+
#
|
2268
|
+
# Compatibility note: When seqs is nil,
|
2269
|
+
# returns true if the program exits normally, and
|
2270
|
+
# returns false if the program exits abnormally.
|
2271
|
+
def query(seqs)
|
2272
|
+
if seqs then
|
2273
|
+
query_alignment(seqs)
|
2274
|
+
else
|
2275
|
+
exec_local(@options)
|
2276
|
+
@exit_status.exitstatus == 0 ? true : false
|
2277
|
+
end
|
2278
|
+
end
|
2279
|
+
|
2280
|
+
# Performs alignment for seqs.
|
2281
|
+
# +seqs+ should be Bio::Alignment or Array of sequences or nil.
|
2282
|
+
def query_alignment(seqs)
|
2283
|
+
unless seqs.respond_to?(:output_fasta) then
|
2284
|
+
seqs = Bio::Alignment.new(seqs)
|
2285
|
+
end
|
2286
|
+
query_string(seqs.output_fasta(:width => 70))
|
2287
|
+
end
|
2288
|
+
|
2289
|
+
# alias of query_alignment.
|
2290
|
+
#
|
2291
|
+
# Compatibility Note: query_align will renamed to query_alignment.
|
2292
|
+
def query_align(seqs)
|
2293
|
+
#warn 'query_align is renamed to query_alignment.'
|
2294
|
+
query_alignment(seqs)
|
2295
|
+
end
|
2296
|
+
|
2297
|
+
# Performs alignment for +str+.
|
2298
|
+
# The +str+ should be a string that can be recognized by the program.
|
2299
|
+
def query_string(str)
|
2300
|
+
_query_string(str, @options)
|
2301
|
+
@report
|
2302
|
+
end
|
2303
|
+
|
2304
|
+
# Performs alignment of sequences in the file named +fn+.
|
2305
|
+
def query_by_filename(filename_in)
|
2306
|
+
_query_local(filename_in, @options)
|
2307
|
+
@report
|
2308
|
+
end
|
2309
|
+
|
2310
|
+
private
|
2311
|
+
# Executes a program in the local machine.
|
2312
|
+
def exec_local(opt, data_stdin = nil)
|
2313
|
+
@exit_status = nil
|
2314
|
+
@command = [ @program, *opt ]
|
2315
|
+
#STDERR.print "DEBUG: ", @command.join(" "), "\n"
|
2316
|
+
@data_stdout = Bio::Command.query_command(@command, data_stdin)
|
2317
|
+
@exit_status = $?
|
2318
|
+
end
|
2319
|
+
|
2320
|
+
# prepare temporary file
|
2321
|
+
def _prepare_tempfile(str = nil)
|
2322
|
+
tf_in = Tempfile.open(str ? 'alignment_i' :'alignment_o')
|
2323
|
+
tf_in.print str if str
|
2324
|
+
tf_in.close(false)
|
2325
|
+
tf_in
|
2326
|
+
end
|
2327
|
+
|
2328
|
+
# generates options specifying input/output filename.
|
2329
|
+
# nil for filename means stdin or stdout.
|
2330
|
+
# +options+ must not contain specify filenames.
|
2331
|
+
# returns an array of string.
|
2332
|
+
def _generate_options(infile, outfile, options)
|
2333
|
+
options +
|
2334
|
+
(infile ? _option_input_file(infile) : _option_input_stdin) +
|
2335
|
+
(outfile ? _option_output_file(outfile) : _option_output_stdout)
|
2336
|
+
end
|
2337
|
+
|
2338
|
+
# generates options specifying input filename.
|
2339
|
+
# returns an array of string
|
2340
|
+
def _option_input_file(fn)
|
2341
|
+
[ fn ]
|
2342
|
+
end
|
2343
|
+
|
2344
|
+
# generates options specifying output filename.
|
2345
|
+
# returns an array of string
|
2346
|
+
def _option_output_file(fn)
|
2347
|
+
raise 'can not specify output file: always stdout'
|
2348
|
+
end
|
2349
|
+
|
2350
|
+
# generates options specifying that input is taken from stdin.
|
2351
|
+
# returns an array of string
|
2352
|
+
def _option_input_stdin
|
2353
|
+
[]
|
2354
|
+
end
|
2355
|
+
|
2356
|
+
# generates options specifying output to stdout.
|
2357
|
+
# returns an array of string
|
2358
|
+
def _option_output_stdout
|
2359
|
+
[]
|
2360
|
+
end
|
2361
|
+
end #class Simple
|
2362
|
+
|
2363
|
+
# mix-in module
|
2364
|
+
module WrapInputStdin
|
2365
|
+
private
|
2366
|
+
# Performs alignment for +str+.
|
2367
|
+
# The +str+ should be a string that can be recognized by the program.
|
2368
|
+
def _query_string(str, opt)
|
2369
|
+
_query_local(nil, opt, str)
|
2370
|
+
end
|
2371
|
+
end #module WrapInputStdin
|
2372
|
+
|
2373
|
+
# mix-in module
|
2374
|
+
module WrapInputTempfile
|
2375
|
+
private
|
2376
|
+
# Performs alignment for +str+.
|
2377
|
+
# The +str+ should be a string that can be recognized by the program.
|
2378
|
+
def _query_string(str, opt)
|
2379
|
+
begin
|
2380
|
+
tf_in = _prepare_tempfile(str)
|
2381
|
+
ret = _query_local(tf_in.path, opt, nil)
|
2382
|
+
ensure
|
2383
|
+
tf_in.close(true) if tf_in
|
2384
|
+
end
|
2385
|
+
ret
|
2386
|
+
end
|
2387
|
+
end #module WrapInputTempfile
|
2388
|
+
|
2389
|
+
# mix-in module
|
2390
|
+
module WrapOutputStdout
|
2391
|
+
private
|
2392
|
+
# Performs alignment by specified filenames
|
2393
|
+
def _query_local(fn_in, opt, data_stdin = nil)
|
2394
|
+
opt = _generate_options(fn_in, nil, opt)
|
2395
|
+
exec_local(opt, data_stdin)
|
2396
|
+
@output = @data_stdout
|
2397
|
+
@report = self.class::DEFAULT_PARSER.new(@output)
|
2398
|
+
@report
|
2399
|
+
end
|
2400
|
+
end #module WrapOutputStdout
|
2401
|
+
|
2402
|
+
# mix-in module
|
2403
|
+
module WrapOutputTempfile
|
2404
|
+
private
|
2405
|
+
# Performs alignment
|
2406
|
+
def _query_local(fn_in, opt, data_stdin = nil)
|
2407
|
+
begin
|
2408
|
+
tf_out = _prepare_tempfile()
|
2409
|
+
opt = _generate_options(fn_in, tf_out.path, opt)
|
2410
|
+
exec_local(opt, data_stdin)
|
2411
|
+
tf_out.open
|
2412
|
+
@output = tf_out.read
|
2413
|
+
ensure
|
2414
|
+
tf_out.close(true) if tf_out
|
2415
|
+
end
|
2416
|
+
@report = self.class::DEFAULT_PARSER.new(@output)
|
2417
|
+
@report
|
2418
|
+
end
|
2419
|
+
end #module WrapOutputTempfile
|
2420
|
+
|
2421
|
+
# Template class for alignment application factory.
|
2422
|
+
# The program needs:
|
2423
|
+
# input: file (cannot accept stdin), format = fasta format
|
2424
|
+
# output: stdout (parser should be specified by DEFAULT_PARSER)
|
2425
|
+
class FileInStdoutOut < Simple
|
2426
|
+
include Bio::Alignment::FactoryTemplate::WrapInputTempfile
|
2427
|
+
include Bio::Alignment::FactoryTemplate::WrapOutputStdout
|
2428
|
+
|
2429
|
+
private
|
2430
|
+
# generates options specifying that input is taken from stdin.
|
2431
|
+
# returns an array of string
|
2432
|
+
def _option_input_stdin
|
2433
|
+
raise 'input is always a file'
|
2434
|
+
end
|
2435
|
+
end #class FileInStdoutOut
|
2436
|
+
|
2437
|
+
# Template class for alignment application factory.
|
2438
|
+
# The program needs:
|
2439
|
+
# input: stdin or file, format = fasta format
|
2440
|
+
# output: file (parser should be specified by DEFAULT_PARSER)
|
2441
|
+
class StdinInFileOut < Simple
|
2442
|
+
include Bio::Alignment::FactoryTemplate::WrapInputStdin
|
2443
|
+
include Bio::Alignment::FactoryTemplate::WrapOutputTempfile
|
2444
|
+
|
2445
|
+
private
|
2446
|
+
# generates options specifying output to stdout.
|
2447
|
+
# returns an array of string
|
2448
|
+
def _option_output_stdout
|
2449
|
+
raise 'output is always a file'
|
2450
|
+
end
|
2451
|
+
end #class StdinInFileOut
|
2452
|
+
|
2453
|
+
# Template class for alignment application factory.
|
2454
|
+
# The program needs:
|
2455
|
+
# input: file (cannot accept stdin), format = fasta format
|
2456
|
+
# output: file (parser should be specified by DEFAULT_PARSER)
|
2457
|
+
class FileInFileOut < Simple
|
2458
|
+
include Bio::Alignment::FactoryTemplate::WrapInputTempfile
|
2459
|
+
include Bio::Alignment::FactoryTemplate::WrapOutputTempfile
|
2460
|
+
|
2461
|
+
private
|
2462
|
+
# generates options specifying that input is taken from stdin.
|
2463
|
+
# returns an array of string
|
2464
|
+
def _option_input_stdin
|
2465
|
+
raise 'input is always a file'
|
2466
|
+
end
|
2467
|
+
|
2468
|
+
# generates options specifying output to stdout.
|
2469
|
+
# returns an array of string
|
2470
|
+
def _option_output_stdout
|
2471
|
+
raise 'output is always a file'
|
2472
|
+
end
|
2473
|
+
end #class FileInFileOut
|
2474
|
+
|
2475
|
+
# Template class for alignment application factory.
|
2476
|
+
# The program needs:
|
2477
|
+
# input: file (cannot accept stdin), format = fasta format
|
2478
|
+
# output: file (parser should be specified by DEFAULT_PARSER)
|
2479
|
+
# Tree (*.dnd) output is also supported.
|
2480
|
+
class FileInFileOutWithTree < FileInFileOut
|
2481
|
+
|
2482
|
+
# alignment guide tree generated by the program (*.dnd file)
|
2483
|
+
attr_reader :output_dnd
|
2484
|
+
|
2485
|
+
def reset
|
2486
|
+
@output_dnd = nil
|
2487
|
+
super
|
2488
|
+
end
|
2489
|
+
|
2490
|
+
private
|
2491
|
+
# Performs alignment
|
2492
|
+
def _query_local(fn_in, opt, data_stdin = nil)
|
2493
|
+
begin
|
2494
|
+
tf_dnd = _prepare_tempfile()
|
2495
|
+
opt = opt + _option_output_dndfile(tf_dnd.path)
|
2496
|
+
ret = super(fn_in, opt, data_stdin)
|
2497
|
+
tf_dnd.open
|
2498
|
+
@output_dnd = tf_dnd.read
|
2499
|
+
ensure
|
2500
|
+
tf_dnd.close(true) if tf_dnd
|
2501
|
+
end
|
2502
|
+
ret
|
2503
|
+
end
|
2504
|
+
|
2505
|
+
# generates options specifying output tree file (*.dnd).
|
2506
|
+
# returns an array of string
|
2507
|
+
def _option_output_dndfile
|
2508
|
+
raise NotImplementedError
|
2509
|
+
end
|
2510
|
+
end #class FileInFileOutWithTree
|
2511
|
+
|
2512
|
+
end #module FactoryTemplate
|
2513
|
+
|
2514
|
+
|
2515
|
+
end #module Alignment
|
2516
|
+
|
2517
|
+
end #module Bio
|
2518
|
+
|