bio 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +14 -122
- data/bin/br_biofetch.rb +2 -2
- data/bin/br_bioflat.rb +2 -2
- data/bin/br_biogetseq.rb +2 -2
- data/bin/br_pmfetch.rb +3 -3
- data/doc/Changes-0.7.rd +77 -0
- data/doc/KEGG_API.rd +523 -232
- data/doc/KEGG_API.rd.ja +529 -207
- data/doc/Tutorial.rd +48 -11
- data/lib/bio.rb +59 -6
- data/lib/bio/alignment.rb +713 -103
- data/lib/bio/appl/bl2seq/report.rb +2 -18
- data/lib/bio/appl/blast.rb +108 -91
- data/lib/bio/appl/blast/format0.rb +33 -18
- data/lib/bio/appl/blast/format8.rb +6 -20
- data/lib/bio/appl/blast/report.rb +293 -429
- data/lib/bio/appl/blast/rexml.rb +8 -22
- data/lib/bio/appl/blast/wublast.rb +21 -12
- data/lib/bio/appl/blast/xmlparser.rb +180 -183
- data/lib/bio/appl/blat/report.rb +127 -30
- data/lib/bio/appl/clustalw.rb +87 -59
- data/lib/bio/appl/clustalw/report.rb +20 -22
- data/lib/bio/appl/emboss.rb +113 -20
- data/lib/bio/appl/fasta.rb +173 -198
- data/lib/bio/appl/fasta/format10.rb +244 -347
- data/lib/bio/appl/gcg/msf.rb +212 -0
- data/lib/bio/appl/gcg/seq.rb +195 -0
- data/lib/bio/appl/genscan/report.rb +5 -23
- data/lib/bio/appl/hmmer.rb +8 -45
- data/lib/bio/appl/hmmer/report.rb +2 -20
- data/lib/bio/appl/iprscan/report.rb +374 -0
- data/lib/bio/appl/mafft.rb +87 -50
- data/lib/bio/appl/mafft/report.rb +151 -44
- data/lib/bio/appl/muscle.rb +52 -0
- data/lib/bio/appl/phylip/alignment.rb +129 -0
- data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
- data/lib/bio/appl/probcons.rb +41 -0
- data/lib/bio/appl/psort.rb +89 -96
- data/lib/bio/appl/psort/report.rb +6 -22
- data/lib/bio/appl/pts1.rb +263 -0
- data/lib/bio/appl/sim4.rb +26 -36
- data/lib/bio/appl/sim4/report.rb +2 -18
- data/lib/bio/appl/sosui/report.rb +5 -20
- data/lib/bio/appl/spidey/report.rb +2 -2
- data/lib/bio/appl/targetp/report.rb +4 -20
- data/lib/bio/appl/tcoffee.rb +55 -0
- data/lib/bio/appl/tmhmm/report.rb +4 -20
- data/lib/bio/command.rb +235 -64
- data/lib/bio/data/aa.rb +21 -26
- data/lib/bio/data/codontable.rb +2 -20
- data/lib/bio/data/na.rb +19 -4
- data/lib/bio/db.rb +27 -12
- data/lib/bio/db/aaindex.rb +2 -20
- data/lib/bio/db/embl/common.rb +4 -21
- data/lib/bio/db/embl/embl.rb +33 -85
- data/lib/bio/db/embl/sptr.rb +612 -302
- data/lib/bio/db/embl/swissprot.rb +10 -29
- data/lib/bio/db/embl/trembl.rb +10 -29
- data/lib/bio/db/embl/uniprot.rb +10 -29
- data/lib/bio/db/fantom.rb +15 -20
- data/lib/bio/db/fasta.rb +3 -3
- data/lib/bio/db/genbank/common.rb +37 -46
- data/lib/bio/db/genbank/ddbj.rb +6 -18
- data/lib/bio/db/genbank/genbank.rb +47 -186
- data/lib/bio/db/genbank/genpept.rb +4 -17
- data/lib/bio/db/genbank/refseq.rb +4 -17
- data/lib/bio/db/gff.rb +103 -35
- data/lib/bio/db/go.rb +4 -20
- data/lib/bio/db/kegg/brite.rb +26 -36
- data/lib/bio/db/kegg/compound.rb +81 -85
- data/lib/bio/db/kegg/drug.rb +98 -0
- data/lib/bio/db/kegg/enzyme.rb +133 -110
- data/lib/bio/db/kegg/expression.rb +2 -20
- data/lib/bio/db/kegg/genes.rb +208 -238
- data/lib/bio/db/kegg/genome.rb +164 -285
- data/lib/bio/db/kegg/glycan.rb +114 -157
- data/lib/bio/db/kegg/keggtab.rb +242 -303
- data/lib/bio/db/kegg/kgml.rb +117 -160
- data/lib/bio/db/kegg/orthology.rb +112 -0
- data/lib/bio/db/kegg/reaction.rb +54 -69
- data/lib/bio/db/kegg/taxonomy.rb +331 -0
- data/lib/bio/db/lasergene.rb +209 -0
- data/lib/bio/db/litdb.rb +3 -27
- data/lib/bio/db/medline.rb +228 -249
- data/lib/bio/db/nbrf.rb +3 -3
- data/lib/bio/db/newick.rb +510 -0
- data/lib/bio/db/nexus.rb +1854 -0
- data/lib/bio/db/pdb.rb +5 -17
- data/lib/bio/db/pdb/atom.rb +2 -18
- data/lib/bio/db/pdb/chain.rb +2 -18
- data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
- data/lib/bio/db/pdb/model.rb +2 -18
- data/lib/bio/db/pdb/pdb.rb +73 -34
- data/lib/bio/db/pdb/residue.rb +4 -20
- data/lib/bio/db/pdb/utils.rb +2 -18
- data/lib/bio/db/prosite.rb +403 -422
- data/lib/bio/db/rebase.rb +84 -40
- data/lib/bio/db/soft.rb +404 -0
- data/lib/bio/db/transfac.rb +5 -17
- data/lib/bio/feature.rb +106 -52
- data/lib/bio/io/das.rb +32 -42
- data/lib/bio/io/dbget.rb +2 -20
- data/lib/bio/io/ddbjxml.rb +77 -138
- data/lib/bio/io/ebisoap.rb +158 -0
- data/lib/bio/io/ensembl.rb +229 -0
- data/lib/bio/io/fastacmd.rb +89 -82
- data/lib/bio/io/fetch.rb +163 -96
- data/lib/bio/io/flatfile.rb +170 -73
- data/lib/bio/io/flatfile/bdb.rb +3 -16
- data/lib/bio/io/flatfile/index.rb +2 -2
- data/lib/bio/io/flatfile/indexer.rb +3 -2
- data/lib/bio/io/higet.rb +12 -31
- data/lib/bio/io/keggapi.rb +210 -269
- data/lib/bio/io/ncbisoap.rb +155 -0
- data/lib/bio/io/pubmed.rb +169 -147
- data/lib/bio/io/registry.rb +4 -20
- data/lib/bio/io/soapwsdl.rb +43 -38
- data/lib/bio/io/sql.rb +242 -305
- data/lib/bio/location.rb +407 -285
- data/lib/bio/map.rb +410 -0
- data/lib/bio/pathway.rb +558 -695
- data/lib/bio/reference.rb +272 -75
- data/lib/bio/sequence.rb +255 -13
- data/lib/bio/sequence/aa.rb +71 -10
- data/lib/bio/sequence/common.rb +187 -33
- data/lib/bio/sequence/compat.rb +59 -4
- data/lib/bio/sequence/format.rb +54 -7
- data/lib/bio/sequence/generic.rb +3 -3
- data/lib/bio/sequence/na.rb +328 -26
- data/lib/bio/shell.rb +11 -4
- data/lib/bio/shell/core.rb +221 -160
- data/lib/bio/shell/demo.rb +18 -15
- data/lib/bio/shell/interface.rb +14 -12
- data/lib/bio/shell/irb.rb +95 -0
- data/lib/bio/shell/object.rb +45 -26
- data/lib/bio/shell/plugin/blast.rb +42 -0
- data/lib/bio/shell/plugin/codon.rb +22 -14
- data/lib/bio/shell/plugin/das.rb +58 -0
- data/lib/bio/shell/plugin/emboss.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +22 -11
- data/lib/bio/shell/plugin/flatfile.rb +2 -2
- data/lib/bio/shell/plugin/keggapi.rb +13 -6
- data/lib/bio/shell/plugin/midi.rb +4 -4
- data/lib/bio/shell/plugin/obda.rb +2 -2
- data/lib/bio/shell/plugin/psort.rb +56 -0
- data/lib/bio/shell/plugin/seq.rb +35 -8
- data/lib/bio/shell/plugin/soap.rb +87 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
- data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
- data/lib/bio/shell/script.rb +25 -0
- data/lib/bio/shell/setup.rb +109 -0
- data/lib/bio/shell/web.rb +70 -58
- data/lib/bio/tree.rb +850 -0
- data/lib/bio/util/color_scheme.rb +84 -107
- data/lib/bio/util/color_scheme/buried.rb +5 -24
- data/lib/bio/util/color_scheme/helix.rb +5 -24
- data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
- data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
- data/lib/bio/util/color_scheme/strand.rb +5 -24
- data/lib/bio/util/color_scheme/taylor.rb +5 -24
- data/lib/bio/util/color_scheme/turn.rb +5 -24
- data/lib/bio/util/color_scheme/zappo.rb +5 -24
- data/lib/bio/util/contingency_table.rb +70 -43
- data/lib/bio/util/restriction_enzyme.rb +228 -0
- data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
- data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
- data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
- data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
- data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
- data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
- data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
- data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
- data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
- data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
- data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
- data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
- data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
- data/lib/bio/util/sirna.rb +4 -22
- data/sample/color_scheme_na.rb +4 -12
- data/sample/enzymes.rb +78 -0
- data/sample/goslim.rb +5 -13
- data/sample/psortplot_html.rb +4 -12
- data/test/data/blast/2.2.15.blastp.m7 +876 -0
- data/test/data/embl/AB090716.embl.rel89 +63 -0
- data/test/data/fasta/example1.txt +75 -0
- data/test/data/fasta/example2.txt +21 -0
- data/test/data/iprscan/merged.raw +32 -0
- data/test/data/iprscan/merged.txt +74 -0
- data/test/data/soft/GDS100_partial.soft +92 -0
- data/test/data/soft/GSE3457_family_partial.soft +874 -0
- data/test/functional/bio/io/test_ensembl.rb +103 -0
- data/test/functional/bio/io/test_soapwsdl.rb +5 -17
- data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
- data/test/unit/bio/appl/blast/test_report.rb +3 -16
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
- data/test/unit/bio/appl/genscan/test_report.rb +3 -16
- data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
- data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
- data/test/unit/bio/appl/mafft/test_report.rb +63 -0
- data/test/unit/bio/appl/sosui/test_report.rb +3 -16
- data/test/unit/bio/appl/targetp/test_report.rb +3 -16
- data/test/unit/bio/appl/test_blast.rb +3 -16
- data/test/unit/bio/appl/test_fasta.rb +4 -16
- data/test/unit/bio/appl/test_pts1.rb +140 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
- data/test/unit/bio/data/test_aa.rb +4 -17
- data/test/unit/bio/data/test_codontable.rb +3 -16
- data/test/unit/bio/data/test_na.rb +3 -3
- data/test/unit/bio/db/embl/test_common.rb +3 -16
- data/test/unit/bio/db/embl/test_embl.rb +3 -16
- data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
- data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
- data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
- data/test/unit/bio/db/kegg/test_genes.rb +3 -16
- data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
- data/test/unit/bio/db/test_aaindex.rb +2 -2
- data/test/unit/bio/db/test_fasta.rb +3 -16
- data/test/unit/bio/db/test_gff.rb +3 -16
- data/test/unit/bio/db/test_lasergene.rb +95 -0
- data/test/unit/bio/db/test_newick.rb +56 -0
- data/test/unit/bio/db/test_nexus.rb +360 -0
- data/test/unit/bio/db/test_prosite.rb +5 -18
- data/test/unit/bio/db/test_rebase.rb +11 -25
- data/test/unit/bio/db/test_soft.rb +138 -0
- data/test/unit/bio/io/test_ddbjxml.rb +5 -17
- data/test/unit/bio/io/test_ensembl.rb +109 -0
- data/test/unit/bio/io/test_fastacmd.rb +3 -16
- data/test/unit/bio/io/test_flatfile.rb +237 -0
- data/test/unit/bio/io/test_soapwsdl.rb +4 -17
- data/test/unit/bio/sequence/test_aa.rb +3 -3
- data/test/unit/bio/sequence/test_common.rb +3 -16
- data/test/unit/bio/sequence/test_compat.rb +3 -16
- data/test/unit/bio/sequence/test_na.rb +29 -3
- data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
- data/test/unit/bio/test_alignment.rb +16 -27
- data/test/unit/bio/test_command.rb +242 -25
- data/test/unit/bio/test_db.rb +3 -16
- data/test/unit/bio/test_feature.rb +4 -16
- data/test/unit/bio/test_location.rb +4 -16
- data/test/unit/bio/test_map.rb +230 -0
- data/test/unit/bio/test_pathway.rb +4 -16
- data/test/unit/bio/test_reference.rb +2 -2
- data/test/unit/bio/test_sequence.rb +7 -19
- data/test/unit/bio/test_shell.rb +3 -16
- data/test/unit/bio/test_tree.rb +593 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
- data/test/unit/bio/util/test_color_scheme.rb +6 -18
- data/test/unit/bio/util/test_contingency_table.rb +6 -18
- data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
- data/test/unit/bio/util/test_sirna.rb +3 -16
- metadata +228 -169
- data/doc/BioRuby.rd.ja +0 -225
- data/doc/Design.rd.ja +0 -341
- data/doc/TODO.rd.ja +0 -138
- data/lib/bio/appl/fasta/format6.rb +0 -37
- data/lib/bio/db/kegg/cell.rb +0 -88
- data/lib/bio/db/kegg/ko.rb +0 -178
- data/lib/bio/shell/rails/Rakefile +0 -10
- data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
- data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
- data/lib/bio/shell/rails/config/boot.rb +0 -19
- data/lib/bio/shell/rails/config/database.yml +0 -85
- data/lib/bio/shell/rails/config/environment.rb +0 -53
- data/lib/bio/shell/rails/config/environments/development.rb +0 -19
- data/lib/bio/shell/rails/config/environments/production.rb +0 -19
- data/lib/bio/shell/rails/config/environments/test.rb +0 -19
- data/lib/bio/shell/rails/config/routes.rb +0 -19
- data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
- data/lib/bio/shell/rails/public/404.html +0 -8
- data/lib/bio/shell/rails/public/500.html +0 -8
- data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
- data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
- data/lib/bio/shell/rails/public/dispatch.rb +0 -10
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +0 -277
- data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
- data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
- data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
- data/lib/bio/shell/rails/public/robots.txt +0 -1
- data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
- data/lib/bio/shell/rails/script/about +0 -3
- data/lib/bio/shell/rails/script/breakpointer +0 -3
- data/lib/bio/shell/rails/script/console +0 -3
- data/lib/bio/shell/rails/script/destroy +0 -3
- data/lib/bio/shell/rails/script/generate +0 -3
- data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
- data/lib/bio/shell/rails/script/performance/profiler +0 -3
- data/lib/bio/shell/rails/script/plugin +0 -3
- data/lib/bio/shell/rails/script/process/reaper +0 -3
- data/lib/bio/shell/rails/script/process/spawner +0 -3
- data/lib/bio/shell/rails/script/process/spinner +0 -3
- data/lib/bio/shell/rails/script/runner +0 -3
- data/lib/bio/shell/rails/script/server +0 -42
- data/lib/bio/shell/rails/test/test_helper.rb +0 -28
data/doc/Tutorial.rd
CHANGED
|
@@ -2,16 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
See the document in the CVS repository ./doc/((<Tutorial.rd|URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/*checkout*/bioruby/doc/Tutorial.rd?rev=HEAD&cvsroot=bioruby&content-type=text/plain>)) - for a potentially more up-to-date edition. This one was updated:
|
|
4
4
|
|
|
5
|
-
$Id: Tutorial.rd,v 1.
|
|
5
|
+
$Id: Tutorial.rd,v 1.13 2007/07/09 12:28:07 pjotr Exp $
|
|
6
6
|
|
|
7
7
|
Translated into English: Naohisa Goto <ng@bioruby.org>
|
|
8
8
|
|
|
9
9
|
Editor: PjotrPrins <p@bioruby.org>
|
|
10
10
|
|
|
11
|
-
Copyright (C) 2001-2003 KATAYAMA Toshiaki <k@bioruby.org>, 2005-
|
|
12
|
-
others
|
|
13
|
-
|
|
14
|
-
NOTE: This page is a work in progress at this point
|
|
11
|
+
Copyright (C) 2001-2003 KATAYAMA Toshiaki <k@bioruby.org>, 2005-2007 Pjotr Prins, Naohisa Goto and others
|
|
15
12
|
|
|
16
13
|
IMPORTANT NOTICE: This page is maintained in the BioRuby CVS
|
|
17
14
|
repository. Please edit the file there otherwise changes may get
|
|
@@ -38,7 +35,7 @@ version it has with the
|
|
|
38
35
|
|
|
39
36
|
command. Showing something like:
|
|
40
37
|
|
|
41
|
-
ruby 1.8.
|
|
38
|
+
ruby 1.8.5 (2006-08-25) [powerpc-linux]
|
|
42
39
|
|
|
43
40
|
|
|
44
41
|
== Trying Bioruby
|
|
@@ -96,6 +93,9 @@ defined in codontable.rb).
|
|
|
96
93
|
|
|
97
94
|
puts seq.complement.translate # translation of complemental strand
|
|
98
95
|
|
|
96
|
+
counts = {'a'=>seq.count('a'),'c'=>seq.count('c'),'g'=>seq.count('g'),'t'=>seq.count('t')}
|
|
97
|
+
p randomseq = Bio::Sequence::NA.randomize(counts) # reshuffle sequence with same freq.
|
|
98
|
+
|
|
99
99
|
The p, print and puts methods are standard Ruby ways of outputting to
|
|
100
100
|
the screen. If you want to know more about standard Ruby commands you
|
|
101
101
|
can use the 'ri' command on the command line (or the help command in
|
|
@@ -462,6 +462,40 @@ Array and BioPerl's Bio::SimpleAlign. A very simple example is:
|
|
|
462
462
|
factory = Bio::ClustalW.new
|
|
463
463
|
a2 = a.do_align(factory)
|
|
464
464
|
|
|
465
|
+
== Restriction Enzymes (Bio::RE)
|
|
466
|
+
|
|
467
|
+
BioRuby has extensive support for restriction enzymes (REs). It contains a full
|
|
468
|
+
library of commonly used REs (from REBASE) which can be used to cut single
|
|
469
|
+
stranded RNA or dubbel stranded DNA into fragments. To list all enzymes:
|
|
470
|
+
|
|
471
|
+
rebase = Bio::RestrictionEnzyme.rebase
|
|
472
|
+
rebase.each do |enzyme_name, info|
|
|
473
|
+
p enzyme_name
|
|
474
|
+
end
|
|
475
|
+
|
|
476
|
+
and cut a sequence with an enzyme follow up with:
|
|
477
|
+
|
|
478
|
+
res = seq.cut_with_enzyme('EcoRII', {:max_permutations => 0}, {:view_ranges => true})
|
|
479
|
+
if res.kind_of? Symbol #error
|
|
480
|
+
err = Err.find_by_code(res.to_s)
|
|
481
|
+
unless err
|
|
482
|
+
err = Err.new(:code => res.to_s)
|
|
483
|
+
end
|
|
484
|
+
end
|
|
485
|
+
res.each do |frag|
|
|
486
|
+
em = EnzymeMatch.new
|
|
487
|
+
|
|
488
|
+
em.p_left = frag.p_left
|
|
489
|
+
em.p_right = frag.p_right
|
|
490
|
+
em.c_left = frag.c_left
|
|
491
|
+
em.c_right = frag.c_right
|
|
492
|
+
|
|
493
|
+
em.err = nil
|
|
494
|
+
em.enzyme = ar_enz
|
|
495
|
+
em.sequence = ar_seq
|
|
496
|
+
p em
|
|
497
|
+
end
|
|
498
|
+
|
|
465
499
|
|
|
466
500
|
== Sequence homology search by using the FASTA program (Bio::Fasta)
|
|
467
501
|
|
|
@@ -1123,14 +1157,17 @@ Please refer to KEGG_API.rd.ja (TRANSLATOR'S NOTE: English version: ((<URL:http:
|
|
|
1123
1157
|
|
|
1124
1158
|
* ((<URL:http://www.genome.jp/kegg/soap/>))
|
|
1125
1159
|
|
|
1160
|
+
== Comparing BioProjects
|
|
1161
|
+
|
|
1162
|
+
For a quick functional comparison of BioRuby, BioPerl, BioPython and Bioconductor (R) see ((<http://sciruby.codeforpeople.com/sr.cgi/BioProjects>))
|
|
1163
|
+
|
|
1126
1164
|
== Using BioRuby with R
|
|
1127
1165
|
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
this point you'll have to create some command line interface.
|
|
1166
|
+
Using Ruby with R Pjotr wrote a section on SciRuby. See ((<ULR:http://sciruby.codeforpeople.com/sr.cgi/RubyWithRlang>))
|
|
1167
|
+
|
|
1168
|
+
== Using BioPerl or BioPython from Ruby
|
|
1132
1169
|
|
|
1133
|
-
|
|
1170
|
+
At the moment there is no easy way of accessing BioPerl from Ruby. The best way, perhaps, is to create a Perl server that gets accessed through XML/RPC or SOAP.
|
|
1134
1171
|
|
|
1135
1172
|
== Installing required external library
|
|
1136
1173
|
|
data/lib/bio.rb
CHANGED
|
@@ -3,14 +3,14 @@
|
|
|
3
3
|
#
|
|
4
4
|
# Copyright:: Copyright (C) 2001-2006
|
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>
|
|
6
|
-
# License:: Ruby
|
|
6
|
+
# License:: The Ruby License
|
|
7
7
|
#
|
|
8
|
-
# $Id: bio.rb,v 1.
|
|
8
|
+
# $Id: bio.rb,v 1.86 2007/07/16 12:26:28 ngoto Exp $
|
|
9
9
|
#
|
|
10
10
|
|
|
11
11
|
module Bio
|
|
12
12
|
|
|
13
|
-
BIORUBY_VERSION = [1,
|
|
13
|
+
BIORUBY_VERSION = [1, 1, 0].extend(Comparable)
|
|
14
14
|
|
|
15
15
|
### Basic data types
|
|
16
16
|
|
|
@@ -42,7 +42,12 @@ module Bio
|
|
|
42
42
|
|
|
43
43
|
autoload :Alignment, 'bio/alignment'
|
|
44
44
|
|
|
45
|
+
## Tree
|
|
46
|
+
autoload :Tree, 'bio/tree'
|
|
45
47
|
|
|
48
|
+
## Map
|
|
49
|
+
autoload :Map, 'bio/map'
|
|
50
|
+
|
|
46
51
|
### Constants
|
|
47
52
|
|
|
48
53
|
autoload :NucleicAcid, 'bio/data/na'
|
|
@@ -83,14 +88,15 @@ module Bio
|
|
|
83
88
|
autoload :GENES, 'bio/db/kegg/genes'
|
|
84
89
|
autoload :ENZYME, 'bio/db/kegg/enzyme'
|
|
85
90
|
autoload :COMPOUND, 'bio/db/kegg/compound'
|
|
91
|
+
autoload :DRUG, 'bio/db/kegg/drug'
|
|
86
92
|
autoload :GLYCAN, 'bio/db/kegg/glycan'
|
|
87
93
|
autoload :REACTION, 'bio/db/kegg/reaction'
|
|
88
94
|
autoload :BRITE, 'bio/db/kegg/brite'
|
|
89
95
|
autoload :CELL, 'bio/db/kegg/cell'
|
|
90
96
|
autoload :EXPRESSION, 'bio/db/kegg/expression'
|
|
91
|
-
autoload :
|
|
92
|
-
autoload :KO, 'bio/db/kegg/ko'
|
|
97
|
+
autoload :ORTHOLOGY, 'bio/db/kegg/orthology'
|
|
93
98
|
autoload :KGML, 'bio/db/kegg/kgml'
|
|
99
|
+
autoload :Taxonomy, 'bio/db/kegg/taxonomy'
|
|
94
100
|
end
|
|
95
101
|
|
|
96
102
|
## other formats
|
|
@@ -110,13 +116,19 @@ module Bio
|
|
|
110
116
|
autoload :GO, 'bio/db/go'
|
|
111
117
|
autoload :PDB, 'bio/db/pdb'
|
|
112
118
|
autoload :NBRF, 'bio/db/nbrf'
|
|
119
|
+
autoload :REBASE, 'bio/db/rebase'
|
|
120
|
+
autoload :SOFT, 'bio/db/soft'
|
|
121
|
+
autoload :Lasergene, 'bio/db/lasergene'
|
|
113
122
|
|
|
123
|
+
autoload :Newick, 'bio/db/newick'
|
|
124
|
+
autoload :Nexus, 'bio/db/nexus'
|
|
114
125
|
|
|
115
126
|
### IO interface modules
|
|
116
127
|
|
|
117
128
|
autoload :Registry, 'bio/io/registry'
|
|
118
129
|
autoload :Fetch, 'bio/io/fetch'
|
|
119
130
|
autoload :SQL, 'bio/io/sql'
|
|
131
|
+
autoload :SOAPWSDL, 'bio/io/soapwsdl'
|
|
120
132
|
autoload :FlatFile, 'bio/io/flatfile'
|
|
121
133
|
autoload :FlatFileIndex, 'bio/io/flatfile/index' # chage to FlatFile::Index ?
|
|
122
134
|
## below are described in bio/io/flatfile/index.rb
|
|
@@ -131,6 +143,8 @@ module Bio
|
|
|
131
143
|
autoload :DAS, 'bio/io/das'
|
|
132
144
|
autoload :DBGET, 'bio/io/dbget'
|
|
133
145
|
|
|
146
|
+
autoload :Ensembl, 'bio/io/ensembl'
|
|
147
|
+
|
|
134
148
|
## below are described in bio/appl/blast.rb
|
|
135
149
|
#class Blast
|
|
136
150
|
# autoload :Fastacmd, 'bio/io/fastacmd'
|
|
@@ -149,7 +163,13 @@ module Bio
|
|
|
149
163
|
autoload :HiGet, 'bio/io/higet'
|
|
150
164
|
end
|
|
151
165
|
|
|
152
|
-
|
|
166
|
+
class EBI
|
|
167
|
+
autoload :SOAP, 'bio/io/ebisoap'
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
class NCBI
|
|
171
|
+
autoload :SOAP, 'bio/io/ncbisoap'
|
|
172
|
+
end
|
|
153
173
|
|
|
154
174
|
|
|
155
175
|
### Applications
|
|
@@ -206,6 +226,10 @@ module Bio
|
|
|
206
226
|
# autoload :Report, 'bio/appl/mafft/report'
|
|
207
227
|
#end
|
|
208
228
|
|
|
229
|
+
autoload :Tcoffee, 'bio/appl/tcoffee'
|
|
230
|
+
autoload :Muscle, 'bio/appl/muscle'
|
|
231
|
+
autoload :Probcons, 'bio/appl/probcons'
|
|
232
|
+
|
|
209
233
|
autoload :Sim4, 'bio/appl/sim4'
|
|
210
234
|
## below are described in bio/appl/sim4.rb
|
|
211
235
|
#class Sim4
|
|
@@ -215,11 +239,40 @@ module Bio
|
|
|
215
239
|
autoload :Spidey, 'bio/appl/spidey/report'
|
|
216
240
|
autoload :Blat, 'bio/appl/blat/report'
|
|
217
241
|
|
|
242
|
+
module GCG
|
|
243
|
+
autoload :Msf, 'bio/appl/gcg/msf'
|
|
244
|
+
autoload :Seq, 'bio/appl/gcg/seq'
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
module Phylip
|
|
248
|
+
autoload :PhylipFormat, 'bio/appl/phylip/alignment'
|
|
249
|
+
autoload :DistanceMatrix, 'bio/appl/phylip/distance_matrix'
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
autoload :Iprscan, 'bio/appl/iprscan/report'
|
|
218
253
|
|
|
219
254
|
### Utilities
|
|
220
255
|
|
|
221
256
|
autoload :SiRNA, 'bio/util/sirna'
|
|
222
257
|
autoload :ColorScheme, 'bio/util/color_scheme'
|
|
258
|
+
autoload :ContingencyTable, 'bio/util/contingency_table'
|
|
259
|
+
autoload :RestrictionEnzyme, 'bio/util/restriction_enzyme'
|
|
260
|
+
|
|
261
|
+
### Service libraries
|
|
262
|
+
autoload :Command, 'bio/command'
|
|
263
|
+
|
|
264
|
+
### Provide BioRuby shell 'command' also as 'Bio.command' (like ChemRuby)
|
|
265
|
+
|
|
266
|
+
def self.method_missing(*args)
|
|
267
|
+
require 'bio/shell'
|
|
268
|
+
extend Bio::Shell
|
|
269
|
+
public_class_method(*Bio::Shell.private_instance_methods)
|
|
270
|
+
if Bio.respond_to?(args.first)
|
|
271
|
+
Bio.send(*args)
|
|
272
|
+
else
|
|
273
|
+
raise NameError
|
|
274
|
+
end
|
|
275
|
+
end
|
|
223
276
|
|
|
224
277
|
end
|
|
225
278
|
|
data/lib/bio/alignment.rb
CHANGED
|
@@ -1,28 +1,12 @@
|
|
|
1
1
|
#
|
|
2
2
|
# = bio/alignment.rb - multiple alignment of sequences
|
|
3
3
|
#
|
|
4
|
-
# Copyright:: Copyright (C) 2003, 2005
|
|
5
|
-
# GOTO Naohisa <
|
|
4
|
+
# Copyright:: Copyright (C) 2003, 2005, 2006
|
|
5
|
+
# GOTO Naohisa <ng@bioruby.org>
|
|
6
6
|
#
|
|
7
|
-
# License::
|
|
7
|
+
# License:: The Ruby License
|
|
8
8
|
#
|
|
9
|
-
# $Id: alignment.rb,v 1.
|
|
10
|
-
#
|
|
11
|
-
#--
|
|
12
|
-
# This library is free software; you can redistribute it and/or
|
|
13
|
-
# modify it under the terms of the GNU Lesser General Public
|
|
14
|
-
# License as published by the Free Software Foundation; either
|
|
15
|
-
# version 2 of the License, or (at your option) any later version.
|
|
16
|
-
#
|
|
17
|
-
# This library is distributed in the hope that it will be useful,
|
|
18
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
19
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
20
|
-
# Lesser General Public License for more details.
|
|
21
|
-
#
|
|
22
|
-
# You should have received a copy of the GNU Lesser General Public
|
|
23
|
-
# License along with this library; if not, write to the Free Software
|
|
24
|
-
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
25
|
-
#++
|
|
9
|
+
# $Id: alignment.rb,v 1.23 2007/07/16 12:21:39 ngoto Exp $
|
|
26
10
|
#
|
|
27
11
|
# = About Bio::Alignment
|
|
28
12
|
#
|
|
@@ -37,52 +21,59 @@
|
|
|
37
21
|
# http://doc.bioperl.org/releases/bioperl-1.4/Bio/SimpleAlign.html
|
|
38
22
|
#
|
|
39
23
|
|
|
24
|
+
require 'tempfile'
|
|
25
|
+
require 'bio/command'
|
|
40
26
|
require 'bio/sequence'
|
|
41
27
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
= About Bio::Alignment
|
|
47
|
-
|
|
48
|
-
Bio::Alignment is a namespace of classes/modules for multiple sequence
|
|
49
|
-
alignment.
|
|
50
|
-
|
|
51
|
-
= Multiple alignment container classes
|
|
52
|
-
|
|
53
|
-
== Bio::Alignment::OriginalAlignment
|
|
54
|
-
|
|
55
|
-
== Bio::Alignment::SequenceArray
|
|
56
|
-
|
|
57
|
-
== Bio::Alignment::SequenceHash
|
|
58
|
-
|
|
59
|
-
= Bio::Alignment::Site
|
|
60
|
-
|
|
61
|
-
= Modules
|
|
62
|
-
|
|
63
|
-
== Bio::Alignment::EnumerableExtension
|
|
64
|
-
|
|
65
|
-
Mix-in for classes included Enumerable.
|
|
66
|
-
|
|
67
|
-
== Bio::Alignment::ArrayExtension
|
|
68
|
-
|
|
69
|
-
Mix-in for Array or Array-like classes.
|
|
28
|
+
#---
|
|
29
|
+
# (depends on autoload)
|
|
30
|
+
#require 'bio/appl/gcg/seq'
|
|
31
|
+
#+++
|
|
70
32
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
Mix-in for Hash or Hash-like classes.
|
|
74
|
-
|
|
75
|
-
== Bio::Alignment::SiteMethods
|
|
76
|
-
|
|
77
|
-
== Bio::Alignment::PropertyMethods
|
|
78
|
-
|
|
79
|
-
= Bio::Alignment::GAP
|
|
80
|
-
|
|
81
|
-
= Compatibility from older BioRuby
|
|
33
|
+
module Bio
|
|
82
34
|
|
|
83
|
-
|
|
35
|
+
#
|
|
36
|
+
# = About Bio::Alignment
|
|
37
|
+
#
|
|
38
|
+
# Bio::Alignment is a namespace of classes/modules for multiple sequence
|
|
39
|
+
# alignment.
|
|
40
|
+
#
|
|
41
|
+
# = Multiple alignment container classes
|
|
42
|
+
#
|
|
43
|
+
# == Bio::Alignment::OriginalAlignment
|
|
44
|
+
#
|
|
45
|
+
# == Bio::Alignment::SequenceArray
|
|
46
|
+
#
|
|
47
|
+
# == Bio::Alignment::SequenceHash
|
|
48
|
+
#
|
|
49
|
+
# = Bio::Alignment::Site
|
|
50
|
+
#
|
|
51
|
+
# = Modules
|
|
52
|
+
#
|
|
53
|
+
# == Bio::Alignment::EnumerableExtension
|
|
54
|
+
#
|
|
55
|
+
# Mix-in for classes included Enumerable.
|
|
56
|
+
#
|
|
57
|
+
# == Bio::Alignment::ArrayExtension
|
|
58
|
+
#
|
|
59
|
+
# Mix-in for Array or Array-like classes.
|
|
60
|
+
#
|
|
61
|
+
# == Bio::Alignment::HashExtension
|
|
62
|
+
#
|
|
63
|
+
# Mix-in for Hash or Hash-like classes.
|
|
64
|
+
#
|
|
65
|
+
# == Bio::Alignment::SiteMethods
|
|
66
|
+
#
|
|
67
|
+
# == Bio::Alignment::PropertyMethods
|
|
68
|
+
#
|
|
69
|
+
# = Bio::Alignment::GAP
|
|
70
|
+
#
|
|
71
|
+
# = Compatibility from older BioRuby
|
|
72
|
+
#
|
|
84
73
|
module Alignment
|
|
85
74
|
|
|
75
|
+
autoload :MultiFastaFormat, 'bio/appl/mafft/report'
|
|
76
|
+
|
|
86
77
|
# Bio::Alignment::PropertyMethods is a set of methods to treat
|
|
87
78
|
# the gap character and so on.
|
|
88
79
|
module PropertyMethods
|
|
@@ -640,10 +631,14 @@ Mix-in for Hash or Hash-like classes.
|
|
|
640
631
|
amino = true
|
|
641
632
|
elsif seqclass == Bio::Sequence::NA then
|
|
642
633
|
amino = false
|
|
643
|
-
elsif self.find { |x| /[EFILPQ]/i =~ x } then
|
|
644
|
-
amino = true
|
|
645
634
|
else
|
|
646
635
|
amino = nil
|
|
636
|
+
self.each_seq do |x|
|
|
637
|
+
if /[EFILPQ]/i =~ x
|
|
638
|
+
amino = true
|
|
639
|
+
break
|
|
640
|
+
end
|
|
641
|
+
end
|
|
647
642
|
end
|
|
648
643
|
end
|
|
649
644
|
if amino then
|
|
@@ -873,16 +868,31 @@ Mix-in for Hash or Hash-like classes.
|
|
|
873
868
|
end
|
|
874
869
|
end #module EnumerableExtension
|
|
875
870
|
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
871
|
+
module Output
|
|
872
|
+
def output(format, *arg)
|
|
873
|
+
case format
|
|
874
|
+
when :clustal
|
|
875
|
+
output_clustal(*arg)
|
|
876
|
+
when :fasta
|
|
877
|
+
output_fasta(*arg)
|
|
878
|
+
when :phylip
|
|
879
|
+
output_phylip(*arg)
|
|
880
|
+
when :phylipnon
|
|
881
|
+
output_phylipnon(*arg)
|
|
882
|
+
when :msf
|
|
883
|
+
output_msf(*arg)
|
|
884
|
+
when :molphy
|
|
885
|
+
output_molphy(*arg)
|
|
886
|
+
else
|
|
887
|
+
raise "Unknown format: #{format.inspect}"
|
|
888
|
+
end
|
|
889
|
+
end
|
|
890
|
+
|
|
891
|
+
# Check whether there are same names for ClustalW format.
|
|
882
892
|
#
|
|
883
893
|
# array:: names of the sequences (array of string)
|
|
884
894
|
# len:: length to check (default:30)
|
|
885
|
-
def
|
|
895
|
+
def __clustal_have_same_name?(array, len = 30)
|
|
886
896
|
na30 = array.collect do |k|
|
|
887
897
|
k.to_s.split(/[\x00\s]/)[0].to_s[0, len].gsub(/\:\;\,\(\)/, '_').to_s
|
|
888
898
|
end
|
|
@@ -909,15 +919,16 @@ Mix-in for Hash or Hash-like classes.
|
|
|
909
919
|
false
|
|
910
920
|
end
|
|
911
921
|
end
|
|
912
|
-
private :
|
|
922
|
+
private :__clustal_have_same_name?
|
|
913
923
|
|
|
914
|
-
# Changes sequence names if there are conflicted names
|
|
924
|
+
# Changes sequence names if there are conflicted names
|
|
925
|
+
# for ClustalW format.
|
|
915
926
|
#
|
|
916
927
|
# array:: names of the sequences (array of string)
|
|
917
928
|
# len:: length to check (default:30)
|
|
918
|
-
def
|
|
929
|
+
def __clustal_avoid_same_name(array, len = 30)
|
|
919
930
|
na = array.collect { |k| k.to_s.gsub(/[\r\n\x00]/, ' ') }
|
|
920
|
-
if dupidx =
|
|
931
|
+
if dupidx = __clustal_have_same_name?(na, len)
|
|
921
932
|
procs = [
|
|
922
933
|
Proc.new { |s, i|
|
|
923
934
|
s[0, len].to_s.gsub(/\s/, '_') + s[len..-1].to_s
|
|
@@ -931,7 +942,7 @@ Mix-in for Hash or Hash-like classes.
|
|
|
931
942
|
s = array[i]
|
|
932
943
|
na[i] = pr.call(s.to_s, i)
|
|
933
944
|
end
|
|
934
|
-
dupidx =
|
|
945
|
+
dupidx = __clustal_have_same_name?(na, len)
|
|
935
946
|
break unless dupidx
|
|
936
947
|
end
|
|
937
948
|
if dupidx then
|
|
@@ -942,13 +953,13 @@ Mix-in for Hash or Hash-like classes.
|
|
|
942
953
|
end
|
|
943
954
|
na
|
|
944
955
|
end
|
|
945
|
-
private :
|
|
956
|
+
private :__clustal_avoid_same_name
|
|
946
957
|
|
|
947
958
|
# Generates ClustalW-formatted text
|
|
948
959
|
# seqs:: sequences (must be an alignment object)
|
|
949
960
|
# names:: names of the sequences
|
|
950
961
|
# options:: options
|
|
951
|
-
def
|
|
962
|
+
def __clustal_formatter(seqs, names, options = {})
|
|
952
963
|
#(original)
|
|
953
964
|
aln = [ "CLUSTAL (0.00) multiple sequence alignment\n\n" ]
|
|
954
965
|
len = seqs.seq_length
|
|
@@ -963,7 +974,7 @@ Mix-in for Hash or Hash-like classes.
|
|
|
963
974
|
sn.collect! { |x| x.split(/\s/)[0].to_s }
|
|
964
975
|
end
|
|
965
976
|
if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
|
|
966
|
-
sn =
|
|
977
|
+
sn = __clustal_avoid_same_name(sn)
|
|
967
978
|
end
|
|
968
979
|
|
|
969
980
|
if sn.find { |x| x.length > 10 } then
|
|
@@ -988,8 +999,9 @@ Mix-in for Hash or Hash-like classes.
|
|
|
988
999
|
end
|
|
989
1000
|
mline = (options[:match_line] or seqs.match_line(mopt))
|
|
990
1001
|
|
|
991
|
-
aseqs = seqs.
|
|
992
|
-
|
|
1002
|
+
aseqs = Array.new(seqs.number_of_sequences).clear
|
|
1003
|
+
seqs.each_seq do |s|
|
|
1004
|
+
aseqs << s.to_s.gsub(seqs.gap_regexp, gchar)
|
|
993
1005
|
end
|
|
994
1006
|
case options[:case].to_s
|
|
995
1007
|
when /lower/i
|
|
@@ -1023,9 +1035,295 @@ Mix-in for Hash or Hash-like classes.
|
|
|
1023
1035
|
end
|
|
1024
1036
|
aln.join('')
|
|
1025
1037
|
end
|
|
1026
|
-
private :
|
|
1027
|
-
|
|
1038
|
+
private :__clustal_formatter
|
|
1039
|
+
|
|
1040
|
+
# Generates ClustalW-formatted text
|
|
1041
|
+
# seqs:: sequences (must be an alignment object)
|
|
1042
|
+
# names:: names of the sequences
|
|
1043
|
+
# options:: options
|
|
1044
|
+
def output_clustal(options = {})
|
|
1045
|
+
__clustal_formatter(self, self.sequence_names, options)
|
|
1046
|
+
end
|
|
1047
|
+
|
|
1048
|
+
# to_clustal is deprecated. Instead, please use output_clustal.
|
|
1049
|
+
#---
|
|
1050
|
+
#alias to_clustal output_clustal
|
|
1051
|
+
#+++
|
|
1052
|
+
def to_clustal(*arg)
|
|
1053
|
+
warn "to_clustal is deprecated. Please use output_clustal."
|
|
1054
|
+
output_clustal(*arg)
|
|
1055
|
+
end
|
|
1056
|
+
|
|
1057
|
+
# Generates fasta format text and returns a string.
|
|
1058
|
+
def output_fasta(options={})
|
|
1059
|
+
#(original)
|
|
1060
|
+
width = (options[:width] or 70)
|
|
1061
|
+
if options[:avoid_same_name] then
|
|
1062
|
+
na = __clustal_avoid_same_name(self.sequence_names, 30)
|
|
1063
|
+
else
|
|
1064
|
+
na = self.sequence_names.collect do |k|
|
|
1065
|
+
k.to_s.gsub(/[\r\n\x00]/, ' ')
|
|
1066
|
+
end
|
|
1067
|
+
end
|
|
1068
|
+
if width and width > 0 then
|
|
1069
|
+
w_reg = Regexp.new(".{1,#{width}}")
|
|
1070
|
+
self.collect do |s|
|
|
1071
|
+
">#{na.shift}\n" + s.to_s.gsub(w_reg, "\\0\n")
|
|
1072
|
+
end.join('')
|
|
1073
|
+
else
|
|
1074
|
+
self.collect do |s|
|
|
1075
|
+
">#{na.shift}\n" + s.to_s + "\n"
|
|
1076
|
+
end.join('')
|
|
1077
|
+
end
|
|
1078
|
+
end
|
|
1079
|
+
|
|
1080
|
+
# generates phylip interleaved alignment format as a string
|
|
1081
|
+
def output_phylip(options = {})
|
|
1082
|
+
aln, aseqs, lines = __output_phylip_common(options)
|
|
1083
|
+
lines.times do
|
|
1084
|
+
aseqs.each { |a| aln << a.shift }
|
|
1085
|
+
aln << "\n"
|
|
1086
|
+
end
|
|
1087
|
+
aln.pop if aln[-1] == "\n"
|
|
1088
|
+
aln.join('')
|
|
1089
|
+
end
|
|
1028
1090
|
|
|
1091
|
+
# generates Phylip3.2 (old) non-interleaved format as a string
|
|
1092
|
+
def output_phylipnon(options = {})
|
|
1093
|
+
aln, aseqs, lines = __output_phylip_common(options)
|
|
1094
|
+
aln.first + aseqs.join('')
|
|
1095
|
+
end
|
|
1096
|
+
|
|
1097
|
+
# common routine for interleaved/non-interleaved phylip format
|
|
1098
|
+
def __output_phylip_common(options = {})
|
|
1099
|
+
len = self.alignment_length
|
|
1100
|
+
aln = [ " #{self.number_of_sequences} #{len}\n" ]
|
|
1101
|
+
sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') }
|
|
1102
|
+
if options[:replace_space]
|
|
1103
|
+
sn.collect! { |x| x.gsub(/\s/, '_') }
|
|
1104
|
+
end
|
|
1105
|
+
if !options.has_key?(:escape) or options[:escape]
|
|
1106
|
+
sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
|
|
1107
|
+
end
|
|
1108
|
+
if !options.has_key?(:split) or options[:split]
|
|
1109
|
+
sn.collect! { |x| x.split(/\s/)[0].to_s }
|
|
1110
|
+
end
|
|
1111
|
+
if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
|
|
1112
|
+
sn = __clustal_avoid_same_name(sn, 10)
|
|
1113
|
+
end
|
|
1114
|
+
|
|
1115
|
+
namewidth = 10
|
|
1116
|
+
seqwidth = (options[:width] or 60)
|
|
1117
|
+
seqwidth = seqwidth.div(10) * 10
|
|
1118
|
+
seqregexp = Regexp.new("(.{1,#{seqwidth.div(10) * 11}})")
|
|
1119
|
+
gchar = (options[:gap_char] or '-')
|
|
1120
|
+
|
|
1121
|
+
aseqs = Array.new(self.number_of_sequences).clear
|
|
1122
|
+
self.each_seq do |s|
|
|
1123
|
+
aseqs << s.to_s.gsub(self.gap_regexp, gchar)
|
|
1124
|
+
end
|
|
1125
|
+
case options[:case].to_s
|
|
1126
|
+
when /lower/i
|
|
1127
|
+
aseqs.each { |s| s.downcase! }
|
|
1128
|
+
when /upper/i
|
|
1129
|
+
aseqs.each { |s| s.upcase! }
|
|
1130
|
+
end
|
|
1131
|
+
|
|
1132
|
+
aseqs.collect! do |s|
|
|
1133
|
+
snx = sn.shift
|
|
1134
|
+
head = sprintf("%*s", -namewidth, snx.to_s)[0, namewidth]
|
|
1135
|
+
head2 = ' ' * namewidth
|
|
1136
|
+
s << (gchar * (len - s.length))
|
|
1137
|
+
s.gsub!(/(.{1,10})/n, " \\1")
|
|
1138
|
+
s.gsub!(seqregexp, "\\1\n")
|
|
1139
|
+
a = s.split(/^/)
|
|
1140
|
+
head += a.shift
|
|
1141
|
+
ret = a.collect { |x| head2 + x }
|
|
1142
|
+
ret.unshift(head)
|
|
1143
|
+
ret
|
|
1144
|
+
end
|
|
1145
|
+
lines = (len + seqwidth - 1).div(seqwidth)
|
|
1146
|
+
[ aln, aseqs, lines ]
|
|
1147
|
+
end
|
|
1148
|
+
|
|
1149
|
+
# Generates Molphy alignment format text as a string
|
|
1150
|
+
def output_molphy(options = {})
|
|
1151
|
+
len = self.alignment_length
|
|
1152
|
+
header = "#{self.number_of_sequences} #{len}\n"
|
|
1153
|
+
sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') }
|
|
1154
|
+
if options[:replace_space]
|
|
1155
|
+
sn.collect! { |x| x.gsub(/\s/, '_') }
|
|
1156
|
+
end
|
|
1157
|
+
if !options.has_key?(:escape) or options[:escape]
|
|
1158
|
+
sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
|
|
1159
|
+
end
|
|
1160
|
+
if !options.has_key?(:split) or options[:split]
|
|
1161
|
+
sn.collect! { |x| x.split(/\s/)[0].to_s }
|
|
1162
|
+
end
|
|
1163
|
+
if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
|
|
1164
|
+
sn = __clustal_avoid_same_name(sn, 30)
|
|
1165
|
+
end
|
|
1166
|
+
|
|
1167
|
+
seqwidth = (options[:width] or 60)
|
|
1168
|
+
seqregexp = Regexp.new("(.{1,#{seqwidth}})")
|
|
1169
|
+
gchar = (options[:gap_char] or '-')
|
|
1170
|
+
|
|
1171
|
+
aseqs = Array.new(len).clear
|
|
1172
|
+
self.each_seq do |s|
|
|
1173
|
+
aseqs << s.to_s.gsub(self.gap_regexp, gchar)
|
|
1174
|
+
end
|
|
1175
|
+
case options[:case].to_s
|
|
1176
|
+
when /lower/i
|
|
1177
|
+
aseqs.each { |s| s.downcase! }
|
|
1178
|
+
when /upper/i
|
|
1179
|
+
aseqs.each { |s| s.upcase! }
|
|
1180
|
+
end
|
|
1181
|
+
|
|
1182
|
+
aseqs.collect! do |s|
|
|
1183
|
+
s << (gchar * (len - s.length))
|
|
1184
|
+
s.gsub!(seqregexp, "\\1\n")
|
|
1185
|
+
sn.shift + "\n" + s
|
|
1186
|
+
end
|
|
1187
|
+
aseqs.unshift(header)
|
|
1188
|
+
aseqs.join('')
|
|
1189
|
+
end
|
|
1190
|
+
|
|
1191
|
+
# Generates msf formatted text as a string
|
|
1192
|
+
def output_msf(options = {})
|
|
1193
|
+
len = self.seq_length
|
|
1194
|
+
|
|
1195
|
+
if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
|
|
1196
|
+
sn = __clustal_avoid_same_name(self.sequence_names)
|
|
1197
|
+
else
|
|
1198
|
+
sn = self.sequence_names.collect do |x|
|
|
1199
|
+
x.to_s.gsub(/[\r\n\x00]/, ' ')
|
|
1200
|
+
end
|
|
1201
|
+
end
|
|
1202
|
+
if !options.has_key?(:replace_space) or options[:replace_space]
|
|
1203
|
+
sn.collect! { |x| x.gsub(/\s/, '_') }
|
|
1204
|
+
end
|
|
1205
|
+
if !options.has_key?(:escape) or options[:escape]
|
|
1206
|
+
sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
|
|
1207
|
+
end
|
|
1208
|
+
if !options.has_key?(:split) or options[:split]
|
|
1209
|
+
sn.collect! { |x| x.split(/\s/)[0].to_s }
|
|
1210
|
+
end
|
|
1211
|
+
|
|
1212
|
+
seqwidth = 50
|
|
1213
|
+
namewidth = [31, sn.collect { |x| x.length }.max ].min
|
|
1214
|
+
sep = ' ' * 2
|
|
1215
|
+
|
|
1216
|
+
seqregexp = Regexp.new("(.{1,#{seqwidth}})")
|
|
1217
|
+
gchar = (options[:gap_char] or '.')
|
|
1218
|
+
pchar = (options[:padding_char] or '~')
|
|
1219
|
+
|
|
1220
|
+
aseqs = Array.new(self.number_of_sequences).clear
|
|
1221
|
+
self.each_seq do |s|
|
|
1222
|
+
aseqs << s.to_s.gsub(self.gap_regexp, gchar)
|
|
1223
|
+
end
|
|
1224
|
+
aseqs.each do |s|
|
|
1225
|
+
s.sub!(/\A#{Regexp.escape(gchar)}+/) { |x| pchar * x.length }
|
|
1226
|
+
s.sub!(/#{Regexp.escape(gchar)}+\z/, '')
|
|
1227
|
+
s << (pchar * (len - s.length))
|
|
1228
|
+
end
|
|
1229
|
+
|
|
1230
|
+
case options[:case].to_s
|
|
1231
|
+
when /lower/i
|
|
1232
|
+
aseqs.each { |s| s.downcase! }
|
|
1233
|
+
when /upper/i
|
|
1234
|
+
aseqs.each { |s| s.upcase! }
|
|
1235
|
+
else #default upcase
|
|
1236
|
+
aseqs.each { |s| s.upcase! }
|
|
1237
|
+
end
|
|
1238
|
+
|
|
1239
|
+
case options[:type].to_s
|
|
1240
|
+
when /protein/i, /aa/i
|
|
1241
|
+
amino = true
|
|
1242
|
+
when /na/i
|
|
1243
|
+
amino = false
|
|
1244
|
+
else
|
|
1245
|
+
if seqclass == Bio::Sequence::AA then
|
|
1246
|
+
amino = true
|
|
1247
|
+
elsif seqclass == Bio::Sequence::NA then
|
|
1248
|
+
amino = false
|
|
1249
|
+
else
|
|
1250
|
+
# if we can't determine, we asuume as protein.
|
|
1251
|
+
amino = aseqs.size
|
|
1252
|
+
aseqs.each { |x| amino -= 1 if /\A[acgt]\z/i =~ x }
|
|
1253
|
+
amino = false if amino <= 0
|
|
1254
|
+
end
|
|
1255
|
+
end
|
|
1256
|
+
|
|
1257
|
+
seq_type = (amino ? 'P' : 'N')
|
|
1258
|
+
|
|
1259
|
+
fn = (options[:entry_id] or self.__id__.abs.to_s + '.msf')
|
|
1260
|
+
dt = (options[:time] or Time.now).strftime('%B %d, %Y %H:%M')
|
|
1261
|
+
|
|
1262
|
+
sums = aseqs.collect { |s| GCG::Seq.calc_checksum(s) }
|
|
1263
|
+
#sums = aseqs.collect { |s| 0 }
|
|
1264
|
+
sum = 0; sums.each { |x| sum += x }; sum %= 10000
|
|
1265
|
+
msf =
|
|
1266
|
+
[
|
|
1267
|
+
"#{seq_type == 'N' ? 'N' : 'A' }A_MULTIPLE_ALIGNMENT 1.0\n",
|
|
1268
|
+
"\n",
|
|
1269
|
+
"\n",
|
|
1270
|
+
" #{fn} MSF: #{len} Type: #{seq_type} #{dt} Check: #{sum} ..\n",
|
|
1271
|
+
"\n"
|
|
1272
|
+
]
|
|
1273
|
+
|
|
1274
|
+
sn.each do |snx|
|
|
1275
|
+
msf << ' Name: ' +
|
|
1276
|
+
sprintf('%*s', -namewidth, snx.to_s)[0, namewidth] +
|
|
1277
|
+
" Len: #{len} Check: #{sums.shift} Weight: 1.00\n"
|
|
1278
|
+
end
|
|
1279
|
+
msf << "\n//\n"
|
|
1280
|
+
|
|
1281
|
+
aseqs.collect! do |s|
|
|
1282
|
+
snx = sn.shift
|
|
1283
|
+
head = sprintf("%*s", namewidth, snx.to_s)[0, namewidth] + sep
|
|
1284
|
+
s.gsub!(seqregexp, "\\1\n")
|
|
1285
|
+
a = s.split(/^/)
|
|
1286
|
+
a.collect { |x| head + x }
|
|
1287
|
+
end
|
|
1288
|
+
lines = (len + seqwidth - 1).div(seqwidth)
|
|
1289
|
+
i = 1
|
|
1290
|
+
lines.times do
|
|
1291
|
+
msf << "\n"
|
|
1292
|
+
n_l = i
|
|
1293
|
+
n_r = [ i + seqwidth - 1, len ].min
|
|
1294
|
+
if n_l != n_r then
|
|
1295
|
+
w = [ n_r - n_l + 1 - n_l.to_s.length - n_r.to_s.length, 1 ].max
|
|
1296
|
+
msf << (' ' * namewidth + sep + n_l.to_s +
|
|
1297
|
+
' ' * w + n_r.to_s + "\n")
|
|
1298
|
+
else
|
|
1299
|
+
msf << (' ' * namewidth + sep + n_l.to_s + "\n")
|
|
1300
|
+
end
|
|
1301
|
+
aseqs.each { |a| msf << a.shift }
|
|
1302
|
+
i += seqwidth
|
|
1303
|
+
end
|
|
1304
|
+
msf << "\n"
|
|
1305
|
+
msf.join('')
|
|
1306
|
+
end
|
|
1307
|
+
|
|
1308
|
+
end #module Output
|
|
1309
|
+
|
|
1310
|
+
module EnumerableExtension
|
|
1311
|
+
include Output
|
|
1312
|
+
|
|
1313
|
+
# Returns number of sequences in this alignment.
|
|
1314
|
+
def number_of_sequences
|
|
1315
|
+
i = 0
|
|
1316
|
+
self.each_seq { |s| i += 1 }
|
|
1317
|
+
i
|
|
1318
|
+
end
|
|
1319
|
+
|
|
1320
|
+
# Returns an array of sequence names.
|
|
1321
|
+
# The order of the names must be the same as
|
|
1322
|
+
# the order of <tt>each_seq</tt>.
|
|
1323
|
+
def sequence_names
|
|
1324
|
+
(0...(self.number_of_sequences)).to_a
|
|
1325
|
+
end
|
|
1326
|
+
end #module EnumerableExtension
|
|
1029
1327
|
|
|
1030
1328
|
# Bio::Alignment::ArrayExtension is a set of useful methods for
|
|
1031
1329
|
# multiple sequence alignment.
|
|
@@ -1046,10 +1344,9 @@ Mix-in for Hash or Hash-like classes.
|
|
|
1046
1344
|
each(&block)
|
|
1047
1345
|
end
|
|
1048
1346
|
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
clustalw_formatter(self, (0...(self.size)).to_a, options)
|
|
1347
|
+
# Returns number of sequences in this alignment.
|
|
1348
|
+
def number_of_sequences
|
|
1349
|
+
self.size
|
|
1053
1350
|
end
|
|
1054
1351
|
end #module ArrayExtension
|
|
1055
1352
|
|
|
@@ -1077,8 +1374,9 @@ Mix-in for Hash or Hash-like classes.
|
|
|
1077
1374
|
# Yields a sequence.
|
|
1078
1375
|
#
|
|
1079
1376
|
# It works the same as Hash#each_value.
|
|
1080
|
-
def each_seq
|
|
1081
|
-
each_value(&block)
|
|
1377
|
+
def each_seq #:yields: seq
|
|
1378
|
+
#each_value(&block)
|
|
1379
|
+
each_key { |k| yield self[k] }
|
|
1082
1380
|
end
|
|
1083
1381
|
|
|
1084
1382
|
# Iterates over each sequence and each results running block
|
|
@@ -1140,15 +1438,16 @@ Mix-in for Hash or Hash-like classes.
|
|
|
1140
1438
|
self
|
|
1141
1439
|
end
|
|
1142
1440
|
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1441
|
+
# Returns number of sequences in this alignment.
|
|
1442
|
+
def number_of_sequences
|
|
1443
|
+
self.size
|
|
1444
|
+
end
|
|
1445
|
+
|
|
1446
|
+
# Returns an array of sequence names.
|
|
1447
|
+
# The order of the names must be the same as
|
|
1448
|
+
# the order of <tt>each_seq</tt>.
|
|
1449
|
+
def sequence_names
|
|
1450
|
+
self.keys
|
|
1152
1451
|
end
|
|
1153
1452
|
end #module HashExtension
|
|
1154
1453
|
|
|
@@ -1423,6 +1722,7 @@ Mix-in for Hash or Hash-like classes.
|
|
|
1423
1722
|
#(Hash&Array-like)
|
|
1424
1723
|
@seqs.size
|
|
1425
1724
|
end
|
|
1725
|
+
alias number_of_sequences size
|
|
1426
1726
|
|
|
1427
1727
|
# If the key exists, returns true. Otherwise, returns false.
|
|
1428
1728
|
# (Like Hash#has_key?)
|
|
@@ -1800,7 +2100,7 @@ Mix-in for Hash or Hash-like classes.
|
|
|
1800
2100
|
options = (arg.shift or {})
|
|
1801
2101
|
width = options[:width] unless width
|
|
1802
2102
|
if options[:avoid_same_name] then
|
|
1803
|
-
na =
|
|
2103
|
+
na = __clustal_avoid_same_name(self.keys, 30)
|
|
1804
2104
|
else
|
|
1805
2105
|
na = self.keys.collect { |k| k.to_s.gsub(/[\r\n\x00]/, ' ') }
|
|
1806
2106
|
end
|
|
@@ -1831,17 +2131,15 @@ Mix-in for Hash or Hash-like classes.
|
|
|
1831
2131
|
# Converts to fasta format and returns a string.
|
|
1832
2132
|
#
|
|
1833
2133
|
# The specification of the argument will be changed.
|
|
2134
|
+
#
|
|
2135
|
+
# Note: <tt>to_fasta</tt> is deprecated.
|
|
2136
|
+
# Please use <tt>output_fasta</tt> instead.
|
|
1834
2137
|
def to_fasta(*arg)
|
|
1835
2138
|
#(original)
|
|
2139
|
+
warn "to_fasta is deprecated. Please use output_fasta."
|
|
1836
2140
|
self.to_fasta_array(*arg).join('')
|
|
1837
2141
|
end
|
|
1838
2142
|
|
|
1839
|
-
include ClustalWFormatter
|
|
1840
|
-
# Returns a string of Clustal W formatted text of the alignment.
|
|
1841
|
-
def to_clustal(options = {})
|
|
1842
|
-
clustalw_formatter(self, self.keys, options)
|
|
1843
|
-
end
|
|
1844
|
-
|
|
1845
2143
|
# The method name <tt>consensus</tt> will be obsoleted.
|
|
1846
2144
|
# Please use <tt>consensus_string</tt> instead.
|
|
1847
2145
|
alias consensus consensus_string
|
|
@@ -1900,6 +2198,318 @@ Mix-in for Hash or Hash-like classes.
|
|
|
1900
2198
|
def self.readfiles(*files)
|
|
1901
2199
|
OriginalAlignment.readfiles(*files)
|
|
1902
2200
|
end
|
|
2201
|
+
|
|
2202
|
+
#---
|
|
2203
|
+
# Service classes for multiple alignment applications
|
|
2204
|
+
#+++
|
|
2205
|
+
#---
|
|
2206
|
+
# Templates of alignment application factory
|
|
2207
|
+
#+++
|
|
2208
|
+
|
|
2209
|
+
# Namespace for templates for alignment application factory
|
|
2210
|
+
module FactoryTemplate
|
|
2211
|
+
|
|
2212
|
+
# Template class for alignment application factory.
|
|
2213
|
+
# The program acts:
|
|
2214
|
+
# input: stdin or file, format = fasta format
|
|
2215
|
+
# output: stdout (parser should be specified by DEFAULT_PARSER)
|
|
2216
|
+
class Simple
|
|
2217
|
+
|
|
2218
|
+
# Creates a new alignment factory
|
|
2219
|
+
def initialize(program = self.class::DEFAULT_PROGRAM, options = [])
|
|
2220
|
+
@program = program
|
|
2221
|
+
@options = options
|
|
2222
|
+
@command = nil
|
|
2223
|
+
@output = nil
|
|
2224
|
+
@report = nil
|
|
2225
|
+
@exit_status = nil
|
|
2226
|
+
@data_stdout = nil
|
|
2227
|
+
end
|
|
2228
|
+
|
|
2229
|
+
# program name
|
|
2230
|
+
attr_accessor :program
|
|
2231
|
+
|
|
2232
|
+
# options
|
|
2233
|
+
attr_accessor :options
|
|
2234
|
+
|
|
2235
|
+
# Last command-line string. Returns nil or an array of String.
|
|
2236
|
+
# Note that filenames described in the command-line may already
|
|
2237
|
+
# be removed because these files may be temporary files.
|
|
2238
|
+
attr_reader :command
|
|
2239
|
+
|
|
2240
|
+
# Last raw result of the program.
|
|
2241
|
+
# Return a string (or nil).
|
|
2242
|
+
attr_reader :output
|
|
2243
|
+
|
|
2244
|
+
# Last result object performed by the factory.
|
|
2245
|
+
attr_reader :report
|
|
2246
|
+
|
|
2247
|
+
# Last exit status
|
|
2248
|
+
attr_reader :exit_status
|
|
2249
|
+
|
|
2250
|
+
# Last output to the stdout.
|
|
2251
|
+
attr_accessor :data_stdout
|
|
2252
|
+
|
|
2253
|
+
# Clear the internal data and status, except program and options.
|
|
2254
|
+
def reset
|
|
2255
|
+
@command = nil
|
|
2256
|
+
@output = nil
|
|
2257
|
+
@report = nil
|
|
2258
|
+
@exit_status = nil
|
|
2259
|
+
@data_stdout = nil
|
|
2260
|
+
end
|
|
2261
|
+
|
|
2262
|
+
# Executes the program.
|
|
2263
|
+
# If +seqs+ is not nil, perform alignment for seqs.
|
|
2264
|
+
# If +seqs+ is nil, simply executes the program.
|
|
2265
|
+
#
|
|
2266
|
+
# Compatibility note: When seqs is nil,
|
|
2267
|
+
# returns true if the program exits normally, and
|
|
2268
|
+
# returns false if the program exits abnormally.
|
|
2269
|
+
def query(seqs)
|
|
2270
|
+
if seqs then
|
|
2271
|
+
query_alignment(seqs)
|
|
2272
|
+
else
|
|
2273
|
+
exec_local(@options)
|
|
2274
|
+
@exit_status.exitstatus == 0 ? true : false
|
|
2275
|
+
end
|
|
2276
|
+
end
|
|
2277
|
+
|
|
2278
|
+
# Performs alignment for seqs.
|
|
2279
|
+
# +seqs+ should be Bio::Alignment or Array of sequences or nil.
|
|
2280
|
+
def query_alignment(seqs)
|
|
2281
|
+
unless seqs.respond_to?(:output_fasta) then
|
|
2282
|
+
seqs = Bio::Alignment.new(seqs)
|
|
2283
|
+
end
|
|
2284
|
+
query_string(seqs.output_fasta(:width => 70))
|
|
2285
|
+
end
|
|
2286
|
+
|
|
2287
|
+
# alias of query_alignment.
|
|
2288
|
+
#
|
|
2289
|
+
# Compatibility Note: query_align will renamed to query_alignment.
|
|
2290
|
+
def query_align(seqs)
|
|
2291
|
+
#warn 'query_align is renamed to query_alignment.'
|
|
2292
|
+
query_alignment(seqs)
|
|
2293
|
+
end
|
|
2294
|
+
|
|
2295
|
+
# Performs alignment for +str+.
|
|
2296
|
+
# The +str+ should be a string that can be recognized by the program.
|
|
2297
|
+
def query_string(str)
|
|
2298
|
+
_query_string(str, @options)
|
|
2299
|
+
@report
|
|
2300
|
+
end
|
|
2301
|
+
|
|
2302
|
+
# Performs alignment of sequences in the file named +fn+.
|
|
2303
|
+
def query_by_filename(filename_in)
|
|
2304
|
+
_query_local(filename_in, @options)
|
|
2305
|
+
@report
|
|
2306
|
+
end
|
|
2307
|
+
|
|
2308
|
+
private
|
|
2309
|
+
# Executes a program in the local machine.
|
|
2310
|
+
def exec_local(opt, data_stdin = nil)
|
|
2311
|
+
@exit_status = nil
|
|
2312
|
+
@command = [ @program, *opt ]
|
|
2313
|
+
#STDERR.print "DEBUG: ", @command.join(" "), "\n"
|
|
2314
|
+
@data_stdout = Bio::Command.query_command(@command, data_stdin)
|
|
2315
|
+
@exit_status = $?
|
|
2316
|
+
end
|
|
2317
|
+
|
|
2318
|
+
# prepare temporary file
|
|
2319
|
+
def _prepare_tempfile(str = nil)
|
|
2320
|
+
tf_in = Tempfile.open(str ? 'alignment_i' :'alignment_o')
|
|
2321
|
+
tf_in.print str if str
|
|
2322
|
+
tf_in.close(false)
|
|
2323
|
+
tf_in
|
|
2324
|
+
end
|
|
2325
|
+
|
|
2326
|
+
# generates options specifying input/output filename.
|
|
2327
|
+
# nil for filename means stdin or stdout.
|
|
2328
|
+
# +options+ must not contain specify filenames.
|
|
2329
|
+
# returns an array of string.
|
|
2330
|
+
def _generate_options(infile, outfile, options)
|
|
2331
|
+
options +
|
|
2332
|
+
(infile ? _option_input_file(infile) : _option_input_stdin) +
|
|
2333
|
+
(outfile ? _option_output_file(outfile) : _option_output_stdout)
|
|
2334
|
+
end
|
|
2335
|
+
|
|
2336
|
+
# generates options specifying input filename.
|
|
2337
|
+
# returns an array of string
|
|
2338
|
+
def _option_input_file(fn)
|
|
2339
|
+
[ fn ]
|
|
2340
|
+
end
|
|
2341
|
+
|
|
2342
|
+
# generates options specifying output filename.
|
|
2343
|
+
# returns an array of string
|
|
2344
|
+
def _option_output_file(fn)
|
|
2345
|
+
raise 'can not specify output file: always stdout'
|
|
2346
|
+
end
|
|
2347
|
+
|
|
2348
|
+
# generates options specifying that input is taken from stdin.
|
|
2349
|
+
# returns an array of string
|
|
2350
|
+
def _option_input_stdin
|
|
2351
|
+
[]
|
|
2352
|
+
end
|
|
2353
|
+
|
|
2354
|
+
# generates options specifying output to stdout.
|
|
2355
|
+
# returns an array of string
|
|
2356
|
+
def _option_output_stdout
|
|
2357
|
+
[]
|
|
2358
|
+
end
|
|
2359
|
+
end #class Simple
|
|
2360
|
+
|
|
2361
|
+
# mix-in module
|
|
2362
|
+
module WrapInputStdin
|
|
2363
|
+
private
|
|
2364
|
+
# Performs alignment for +str+.
|
|
2365
|
+
# The +str+ should be a string that can be recognized by the program.
|
|
2366
|
+
def _query_string(str, opt)
|
|
2367
|
+
_query_local(nil, opt, str)
|
|
2368
|
+
end
|
|
2369
|
+
end #module WrapInputStdin
|
|
2370
|
+
|
|
2371
|
+
# mix-in module
|
|
2372
|
+
module WrapInputTempfile
|
|
2373
|
+
private
|
|
2374
|
+
# Performs alignment for +str+.
|
|
2375
|
+
# The +str+ should be a string that can be recognized by the program.
|
|
2376
|
+
def _query_string(str, opt)
|
|
2377
|
+
begin
|
|
2378
|
+
tf_in = _prepare_tempfile(str)
|
|
2379
|
+
ret = _query_local(tf_in.path, opt, nil)
|
|
2380
|
+
ensure
|
|
2381
|
+
tf_in.close(true) if tf_in
|
|
2382
|
+
end
|
|
2383
|
+
ret
|
|
2384
|
+
end
|
|
2385
|
+
end #module WrapInputTempfile
|
|
2386
|
+
|
|
2387
|
+
# mix-in module
|
|
2388
|
+
module WrapOutputStdout
|
|
2389
|
+
private
|
|
2390
|
+
# Performs alignment by specified filenames
|
|
2391
|
+
def _query_local(fn_in, opt, data_stdin = nil)
|
|
2392
|
+
opt = _generate_options(fn_in, nil, opt)
|
|
2393
|
+
exec_local(opt, data_stdin)
|
|
2394
|
+
@output = @data_stdout
|
|
2395
|
+
@report = self.class::DEFAULT_PARSER.new(@output)
|
|
2396
|
+
@report
|
|
2397
|
+
end
|
|
2398
|
+
end #module WrapOutputStdout
|
|
2399
|
+
|
|
2400
|
+
# mix-in module
|
|
2401
|
+
module WrapOutputTempfile
|
|
2402
|
+
private
|
|
2403
|
+
# Performs alignment
|
|
2404
|
+
def _query_local(fn_in, opt, data_stdin = nil)
|
|
2405
|
+
begin
|
|
2406
|
+
tf_out = _prepare_tempfile()
|
|
2407
|
+
opt = _generate_options(fn_in, tf_out.path, opt)
|
|
2408
|
+
exec_local(opt, data_stdin)
|
|
2409
|
+
tf_out.open
|
|
2410
|
+
@output = tf_out.read
|
|
2411
|
+
ensure
|
|
2412
|
+
tf_out.close(true) if tf_out
|
|
2413
|
+
end
|
|
2414
|
+
@report = self.class::DEFAULT_PARSER.new(@output)
|
|
2415
|
+
@report
|
|
2416
|
+
end
|
|
2417
|
+
end #module WrapOutputTempfile
|
|
2418
|
+
|
|
2419
|
+
# Template class for alignment application factory.
|
|
2420
|
+
# The program needs:
|
|
2421
|
+
# input: file (cannot accept stdin), format = fasta format
|
|
2422
|
+
# output: stdout (parser should be specified by DEFAULT_PARSER)
|
|
2423
|
+
class FileInStdoutOut < Simple
|
|
2424
|
+
include Bio::Alignment::FactoryTemplate::WrapInputTempfile
|
|
2425
|
+
include Bio::Alignment::FactoryTemplate::WrapOutputStdout
|
|
2426
|
+
|
|
2427
|
+
private
|
|
2428
|
+
# generates options specifying that input is taken from stdin.
|
|
2429
|
+
# returns an array of string
|
|
2430
|
+
def _option_input_stdin
|
|
2431
|
+
raise 'input is always a file'
|
|
2432
|
+
end
|
|
2433
|
+
end #class FileInStdoutOut
|
|
2434
|
+
|
|
2435
|
+
# Template class for alignment application factory.
|
|
2436
|
+
# The program needs:
|
|
2437
|
+
# input: stdin or file, format = fasta format
|
|
2438
|
+
# output: file (parser should be specified by DEFAULT_PARSER)
|
|
2439
|
+
class StdinInFileOut < Simple
|
|
2440
|
+
include Bio::Alignment::FactoryTemplate::WrapInputStdin
|
|
2441
|
+
include Bio::Alignment::FactoryTemplate::WrapOutputTempfile
|
|
2442
|
+
|
|
2443
|
+
private
|
|
2444
|
+
# generates options specifying output to stdout.
|
|
2445
|
+
# returns an array of string
|
|
2446
|
+
def _option_output_stdout
|
|
2447
|
+
raise 'output is always a file'
|
|
2448
|
+
end
|
|
2449
|
+
end #class StdinInFileOut
|
|
2450
|
+
|
|
2451
|
+
# Template class for alignment application factory.
|
|
2452
|
+
# The program needs:
|
|
2453
|
+
# input: file (cannot accept stdin), format = fasta format
|
|
2454
|
+
# output: file (parser should be specified by DEFAULT_PARSER)
|
|
2455
|
+
class FileInFileOut < Simple
|
|
2456
|
+
include Bio::Alignment::FactoryTemplate::WrapInputTempfile
|
|
2457
|
+
include Bio::Alignment::FactoryTemplate::WrapOutputTempfile
|
|
2458
|
+
|
|
2459
|
+
private
|
|
2460
|
+
# generates options specifying that input is taken from stdin.
|
|
2461
|
+
# returns an array of string
|
|
2462
|
+
def _option_input_stdin
|
|
2463
|
+
raise 'input is always a file'
|
|
2464
|
+
end
|
|
2465
|
+
|
|
2466
|
+
# generates options specifying output to stdout.
|
|
2467
|
+
# returns an array of string
|
|
2468
|
+
def _option_output_stdout
|
|
2469
|
+
raise 'output is always a file'
|
|
2470
|
+
end
|
|
2471
|
+
end #class FileInFileOut
|
|
2472
|
+
|
|
2473
|
+
# Template class for alignment application factory.
|
|
2474
|
+
# The program needs:
|
|
2475
|
+
# input: file (cannot accept stdin), format = fasta format
|
|
2476
|
+
# output: file (parser should be specified by DEFAULT_PARSER)
|
|
2477
|
+
# Tree (*.dnd) output is also supported.
|
|
2478
|
+
class FileInFileOutWithTree < FileInFileOut
|
|
2479
|
+
|
|
2480
|
+
# alignment guide tree generated by the program (*.dnd file)
|
|
2481
|
+
attr_reader :output_dnd
|
|
2482
|
+
|
|
2483
|
+
def reset
|
|
2484
|
+
@output_dnd = nil
|
|
2485
|
+
super
|
|
2486
|
+
end
|
|
2487
|
+
|
|
2488
|
+
private
|
|
2489
|
+
# Performs alignment
|
|
2490
|
+
def _query_local(fn_in, opt, data_stdin = nil)
|
|
2491
|
+
begin
|
|
2492
|
+
tf_dnd = _prepare_tempfile()
|
|
2493
|
+
opt = opt + _option_output_dndfile(tf_dnd.path)
|
|
2494
|
+
ret = super(fn_in, opt, data_stdin)
|
|
2495
|
+
tf_dnd.open
|
|
2496
|
+
@output_dnd = tf_dnd.read
|
|
2497
|
+
ensure
|
|
2498
|
+
tf_dnd.close(true) if tf_dnd
|
|
2499
|
+
end
|
|
2500
|
+
ret
|
|
2501
|
+
end
|
|
2502
|
+
|
|
2503
|
+
# generates options specifying output tree file (*.dnd).
|
|
2504
|
+
# returns an array of string
|
|
2505
|
+
def _option_output_dndfile
|
|
2506
|
+
raise NotImplementedError
|
|
2507
|
+
end
|
|
2508
|
+
end #class FileInFileOutWithTree
|
|
2509
|
+
|
|
2510
|
+
end #module FactoryTemplate
|
|
2511
|
+
|
|
2512
|
+
|
|
1903
2513
|
end #module Alignment
|
|
1904
2514
|
|
|
1905
2515
|
end #module Bio
|