bio 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/bioruby +14 -122
- data/bin/br_biofetch.rb +2 -2
- data/bin/br_bioflat.rb +2 -2
- data/bin/br_biogetseq.rb +2 -2
- data/bin/br_pmfetch.rb +3 -3
- data/doc/Changes-0.7.rd +77 -0
- data/doc/KEGG_API.rd +523 -232
- data/doc/KEGG_API.rd.ja +529 -207
- data/doc/Tutorial.rd +48 -11
- data/lib/bio.rb +59 -6
- data/lib/bio/alignment.rb +713 -103
- data/lib/bio/appl/bl2seq/report.rb +2 -18
- data/lib/bio/appl/blast.rb +108 -91
- data/lib/bio/appl/blast/format0.rb +33 -18
- data/lib/bio/appl/blast/format8.rb +6 -20
- data/lib/bio/appl/blast/report.rb +293 -429
- data/lib/bio/appl/blast/rexml.rb +8 -22
- data/lib/bio/appl/blast/wublast.rb +21 -12
- data/lib/bio/appl/blast/xmlparser.rb +180 -183
- data/lib/bio/appl/blat/report.rb +127 -30
- data/lib/bio/appl/clustalw.rb +87 -59
- data/lib/bio/appl/clustalw/report.rb +20 -22
- data/lib/bio/appl/emboss.rb +113 -20
- data/lib/bio/appl/fasta.rb +173 -198
- data/lib/bio/appl/fasta/format10.rb +244 -347
- data/lib/bio/appl/gcg/msf.rb +212 -0
- data/lib/bio/appl/gcg/seq.rb +195 -0
- data/lib/bio/appl/genscan/report.rb +5 -23
- data/lib/bio/appl/hmmer.rb +8 -45
- data/lib/bio/appl/hmmer/report.rb +2 -20
- data/lib/bio/appl/iprscan/report.rb +374 -0
- data/lib/bio/appl/mafft.rb +87 -50
- data/lib/bio/appl/mafft/report.rb +151 -44
- data/lib/bio/appl/muscle.rb +52 -0
- data/lib/bio/appl/phylip/alignment.rb +129 -0
- data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
- data/lib/bio/appl/probcons.rb +41 -0
- data/lib/bio/appl/psort.rb +89 -96
- data/lib/bio/appl/psort/report.rb +6 -22
- data/lib/bio/appl/pts1.rb +263 -0
- data/lib/bio/appl/sim4.rb +26 -36
- data/lib/bio/appl/sim4/report.rb +2 -18
- data/lib/bio/appl/sosui/report.rb +5 -20
- data/lib/bio/appl/spidey/report.rb +2 -2
- data/lib/bio/appl/targetp/report.rb +4 -20
- data/lib/bio/appl/tcoffee.rb +55 -0
- data/lib/bio/appl/tmhmm/report.rb +4 -20
- data/lib/bio/command.rb +235 -64
- data/lib/bio/data/aa.rb +21 -26
- data/lib/bio/data/codontable.rb +2 -20
- data/lib/bio/data/na.rb +19 -4
- data/lib/bio/db.rb +27 -12
- data/lib/bio/db/aaindex.rb +2 -20
- data/lib/bio/db/embl/common.rb +4 -21
- data/lib/bio/db/embl/embl.rb +33 -85
- data/lib/bio/db/embl/sptr.rb +612 -302
- data/lib/bio/db/embl/swissprot.rb +10 -29
- data/lib/bio/db/embl/trembl.rb +10 -29
- data/lib/bio/db/embl/uniprot.rb +10 -29
- data/lib/bio/db/fantom.rb +15 -20
- data/lib/bio/db/fasta.rb +3 -3
- data/lib/bio/db/genbank/common.rb +37 -46
- data/lib/bio/db/genbank/ddbj.rb +6 -18
- data/lib/bio/db/genbank/genbank.rb +47 -186
- data/lib/bio/db/genbank/genpept.rb +4 -17
- data/lib/bio/db/genbank/refseq.rb +4 -17
- data/lib/bio/db/gff.rb +103 -35
- data/lib/bio/db/go.rb +4 -20
- data/lib/bio/db/kegg/brite.rb +26 -36
- data/lib/bio/db/kegg/compound.rb +81 -85
- data/lib/bio/db/kegg/drug.rb +98 -0
- data/lib/bio/db/kegg/enzyme.rb +133 -110
- data/lib/bio/db/kegg/expression.rb +2 -20
- data/lib/bio/db/kegg/genes.rb +208 -238
- data/lib/bio/db/kegg/genome.rb +164 -285
- data/lib/bio/db/kegg/glycan.rb +114 -157
- data/lib/bio/db/kegg/keggtab.rb +242 -303
- data/lib/bio/db/kegg/kgml.rb +117 -160
- data/lib/bio/db/kegg/orthology.rb +112 -0
- data/lib/bio/db/kegg/reaction.rb +54 -69
- data/lib/bio/db/kegg/taxonomy.rb +331 -0
- data/lib/bio/db/lasergene.rb +209 -0
- data/lib/bio/db/litdb.rb +3 -27
- data/lib/bio/db/medline.rb +228 -249
- data/lib/bio/db/nbrf.rb +3 -3
- data/lib/bio/db/newick.rb +510 -0
- data/lib/bio/db/nexus.rb +1854 -0
- data/lib/bio/db/pdb.rb +5 -17
- data/lib/bio/db/pdb/atom.rb +2 -18
- data/lib/bio/db/pdb/chain.rb +2 -18
- data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
- data/lib/bio/db/pdb/model.rb +2 -18
- data/lib/bio/db/pdb/pdb.rb +73 -34
- data/lib/bio/db/pdb/residue.rb +4 -20
- data/lib/bio/db/pdb/utils.rb +2 -18
- data/lib/bio/db/prosite.rb +403 -422
- data/lib/bio/db/rebase.rb +84 -40
- data/lib/bio/db/soft.rb +404 -0
- data/lib/bio/db/transfac.rb +5 -17
- data/lib/bio/feature.rb +106 -52
- data/lib/bio/io/das.rb +32 -42
- data/lib/bio/io/dbget.rb +2 -20
- data/lib/bio/io/ddbjxml.rb +77 -138
- data/lib/bio/io/ebisoap.rb +158 -0
- data/lib/bio/io/ensembl.rb +229 -0
- data/lib/bio/io/fastacmd.rb +89 -82
- data/lib/bio/io/fetch.rb +163 -96
- data/lib/bio/io/flatfile.rb +170 -73
- data/lib/bio/io/flatfile/bdb.rb +3 -16
- data/lib/bio/io/flatfile/index.rb +2 -2
- data/lib/bio/io/flatfile/indexer.rb +3 -2
- data/lib/bio/io/higet.rb +12 -31
- data/lib/bio/io/keggapi.rb +210 -269
- data/lib/bio/io/ncbisoap.rb +155 -0
- data/lib/bio/io/pubmed.rb +169 -147
- data/lib/bio/io/registry.rb +4 -20
- data/lib/bio/io/soapwsdl.rb +43 -38
- data/lib/bio/io/sql.rb +242 -305
- data/lib/bio/location.rb +407 -285
- data/lib/bio/map.rb +410 -0
- data/lib/bio/pathway.rb +558 -695
- data/lib/bio/reference.rb +272 -75
- data/lib/bio/sequence.rb +255 -13
- data/lib/bio/sequence/aa.rb +71 -10
- data/lib/bio/sequence/common.rb +187 -33
- data/lib/bio/sequence/compat.rb +59 -4
- data/lib/bio/sequence/format.rb +54 -7
- data/lib/bio/sequence/generic.rb +3 -3
- data/lib/bio/sequence/na.rb +328 -26
- data/lib/bio/shell.rb +11 -4
- data/lib/bio/shell/core.rb +221 -160
- data/lib/bio/shell/demo.rb +18 -15
- data/lib/bio/shell/interface.rb +14 -12
- data/lib/bio/shell/irb.rb +95 -0
- data/lib/bio/shell/object.rb +45 -26
- data/lib/bio/shell/plugin/blast.rb +42 -0
- data/lib/bio/shell/plugin/codon.rb +22 -14
- data/lib/bio/shell/plugin/das.rb +58 -0
- data/lib/bio/shell/plugin/emboss.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +22 -11
- data/lib/bio/shell/plugin/flatfile.rb +2 -2
- data/lib/bio/shell/plugin/keggapi.rb +13 -6
- data/lib/bio/shell/plugin/midi.rb +4 -4
- data/lib/bio/shell/plugin/obda.rb +2 -2
- data/lib/bio/shell/plugin/psort.rb +56 -0
- data/lib/bio/shell/plugin/seq.rb +35 -8
- data/lib/bio/shell/plugin/soap.rb +87 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
- data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
- data/lib/bio/shell/script.rb +25 -0
- data/lib/bio/shell/setup.rb +109 -0
- data/lib/bio/shell/web.rb +70 -58
- data/lib/bio/tree.rb +850 -0
- data/lib/bio/util/color_scheme.rb +84 -107
- data/lib/bio/util/color_scheme/buried.rb +5 -24
- data/lib/bio/util/color_scheme/helix.rb +5 -24
- data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
- data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
- data/lib/bio/util/color_scheme/strand.rb +5 -24
- data/lib/bio/util/color_scheme/taylor.rb +5 -24
- data/lib/bio/util/color_scheme/turn.rb +5 -24
- data/lib/bio/util/color_scheme/zappo.rb +5 -24
- data/lib/bio/util/contingency_table.rb +70 -43
- data/lib/bio/util/restriction_enzyme.rb +228 -0
- data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
- data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
- data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
- data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
- data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
- data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
- data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
- data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
- data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
- data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
- data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
- data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
- data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
- data/lib/bio/util/sirna.rb +4 -22
- data/sample/color_scheme_na.rb +4 -12
- data/sample/enzymes.rb +78 -0
- data/sample/goslim.rb +5 -13
- data/sample/psortplot_html.rb +4 -12
- data/test/data/blast/2.2.15.blastp.m7 +876 -0
- data/test/data/embl/AB090716.embl.rel89 +63 -0
- data/test/data/fasta/example1.txt +75 -0
- data/test/data/fasta/example2.txt +21 -0
- data/test/data/iprscan/merged.raw +32 -0
- data/test/data/iprscan/merged.txt +74 -0
- data/test/data/soft/GDS100_partial.soft +92 -0
- data/test/data/soft/GSE3457_family_partial.soft +874 -0
- data/test/functional/bio/io/test_ensembl.rb +103 -0
- data/test/functional/bio/io/test_soapwsdl.rb +5 -17
- data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
- data/test/unit/bio/appl/blast/test_report.rb +3 -16
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
- data/test/unit/bio/appl/genscan/test_report.rb +3 -16
- data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
- data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
- data/test/unit/bio/appl/mafft/test_report.rb +63 -0
- data/test/unit/bio/appl/sosui/test_report.rb +3 -16
- data/test/unit/bio/appl/targetp/test_report.rb +3 -16
- data/test/unit/bio/appl/test_blast.rb +3 -16
- data/test/unit/bio/appl/test_fasta.rb +4 -16
- data/test/unit/bio/appl/test_pts1.rb +140 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
- data/test/unit/bio/data/test_aa.rb +4 -17
- data/test/unit/bio/data/test_codontable.rb +3 -16
- data/test/unit/bio/data/test_na.rb +3 -3
- data/test/unit/bio/db/embl/test_common.rb +3 -16
- data/test/unit/bio/db/embl/test_embl.rb +3 -16
- data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
- data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
- data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
- data/test/unit/bio/db/kegg/test_genes.rb +3 -16
- data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
- data/test/unit/bio/db/test_aaindex.rb +2 -2
- data/test/unit/bio/db/test_fasta.rb +3 -16
- data/test/unit/bio/db/test_gff.rb +3 -16
- data/test/unit/bio/db/test_lasergene.rb +95 -0
- data/test/unit/bio/db/test_newick.rb +56 -0
- data/test/unit/bio/db/test_nexus.rb +360 -0
- data/test/unit/bio/db/test_prosite.rb +5 -18
- data/test/unit/bio/db/test_rebase.rb +11 -25
- data/test/unit/bio/db/test_soft.rb +138 -0
- data/test/unit/bio/io/test_ddbjxml.rb +5 -17
- data/test/unit/bio/io/test_ensembl.rb +109 -0
- data/test/unit/bio/io/test_fastacmd.rb +3 -16
- data/test/unit/bio/io/test_flatfile.rb +237 -0
- data/test/unit/bio/io/test_soapwsdl.rb +4 -17
- data/test/unit/bio/sequence/test_aa.rb +3 -3
- data/test/unit/bio/sequence/test_common.rb +3 -16
- data/test/unit/bio/sequence/test_compat.rb +3 -16
- data/test/unit/bio/sequence/test_na.rb +29 -3
- data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
- data/test/unit/bio/test_alignment.rb +16 -27
- data/test/unit/bio/test_command.rb +242 -25
- data/test/unit/bio/test_db.rb +3 -16
- data/test/unit/bio/test_feature.rb +4 -16
- data/test/unit/bio/test_location.rb +4 -16
- data/test/unit/bio/test_map.rb +230 -0
- data/test/unit/bio/test_pathway.rb +4 -16
- data/test/unit/bio/test_reference.rb +2 -2
- data/test/unit/bio/test_sequence.rb +7 -19
- data/test/unit/bio/test_shell.rb +3 -16
- data/test/unit/bio/test_tree.rb +593 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
- data/test/unit/bio/util/test_color_scheme.rb +6 -18
- data/test/unit/bio/util/test_contingency_table.rb +6 -18
- data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
- data/test/unit/bio/util/test_sirna.rb +3 -16
- metadata +228 -169
- data/doc/BioRuby.rd.ja +0 -225
- data/doc/Design.rd.ja +0 -341
- data/doc/TODO.rd.ja +0 -138
- data/lib/bio/appl/fasta/format6.rb +0 -37
- data/lib/bio/db/kegg/cell.rb +0 -88
- data/lib/bio/db/kegg/ko.rb +0 -178
- data/lib/bio/shell/rails/Rakefile +0 -10
- data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
- data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
- data/lib/bio/shell/rails/config/boot.rb +0 -19
- data/lib/bio/shell/rails/config/database.yml +0 -85
- data/lib/bio/shell/rails/config/environment.rb +0 -53
- data/lib/bio/shell/rails/config/environments/development.rb +0 -19
- data/lib/bio/shell/rails/config/environments/production.rb +0 -19
- data/lib/bio/shell/rails/config/environments/test.rb +0 -19
- data/lib/bio/shell/rails/config/routes.rb +0 -19
- data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
- data/lib/bio/shell/rails/public/404.html +0 -8
- data/lib/bio/shell/rails/public/500.html +0 -8
- data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
- data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
- data/lib/bio/shell/rails/public/dispatch.rb +0 -10
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +0 -277
- data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
- data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
- data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
- data/lib/bio/shell/rails/public/robots.txt +0 -1
- data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
- data/lib/bio/shell/rails/script/about +0 -3
- data/lib/bio/shell/rails/script/breakpointer +0 -3
- data/lib/bio/shell/rails/script/console +0 -3
- data/lib/bio/shell/rails/script/destroy +0 -3
- data/lib/bio/shell/rails/script/generate +0 -3
- data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
- data/lib/bio/shell/rails/script/performance/profiler +0 -3
- data/lib/bio/shell/rails/script/plugin +0 -3
- data/lib/bio/shell/rails/script/process/reaper +0 -3
- data/lib/bio/shell/rails/script/process/spawner +0 -3
- data/lib/bio/shell/rails/script/process/spinner +0 -3
- data/lib/bio/shell/rails/script/runner +0 -3
- data/lib/bio/shell/rails/script/server +0 -42
- data/lib/bio/shell/rails/test/test_helper.rb +0 -28
data/doc/Tutorial.rd
CHANGED
@@ -2,16 +2,13 @@
|
|
2
2
|
|
3
3
|
See the document in the CVS repository ./doc/((<Tutorial.rd|URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/*checkout*/bioruby/doc/Tutorial.rd?rev=HEAD&cvsroot=bioruby&content-type=text/plain>)) - for a potentially more up-to-date edition. This one was updated:
|
4
4
|
|
5
|
-
$Id: Tutorial.rd,v 1.
|
5
|
+
$Id: Tutorial.rd,v 1.13 2007/07/09 12:28:07 pjotr Exp $
|
6
6
|
|
7
7
|
Translated into English: Naohisa Goto <ng@bioruby.org>
|
8
8
|
|
9
9
|
Editor: PjotrPrins <p@bioruby.org>
|
10
10
|
|
11
|
-
Copyright (C) 2001-2003 KATAYAMA Toshiaki <k@bioruby.org>, 2005-
|
12
|
-
others
|
13
|
-
|
14
|
-
NOTE: This page is a work in progress at this point
|
11
|
+
Copyright (C) 2001-2003 KATAYAMA Toshiaki <k@bioruby.org>, 2005-2007 Pjotr Prins, Naohisa Goto and others
|
15
12
|
|
16
13
|
IMPORTANT NOTICE: This page is maintained in the BioRuby CVS
|
17
14
|
repository. Please edit the file there otherwise changes may get
|
@@ -38,7 +35,7 @@ version it has with the
|
|
38
35
|
|
39
36
|
command. Showing something like:
|
40
37
|
|
41
|
-
ruby 1.8.
|
38
|
+
ruby 1.8.5 (2006-08-25) [powerpc-linux]
|
42
39
|
|
43
40
|
|
44
41
|
== Trying Bioruby
|
@@ -96,6 +93,9 @@ defined in codontable.rb).
|
|
96
93
|
|
97
94
|
puts seq.complement.translate # translation of complemental strand
|
98
95
|
|
96
|
+
counts = {'a'=>seq.count('a'),'c'=>seq.count('c'),'g'=>seq.count('g'),'t'=>seq.count('t')}
|
97
|
+
p randomseq = Bio::Sequence::NA.randomize(counts) # reshuffle sequence with same freq.
|
98
|
+
|
99
99
|
The p, print and puts methods are standard Ruby ways of outputting to
|
100
100
|
the screen. If you want to know more about standard Ruby commands you
|
101
101
|
can use the 'ri' command on the command line (or the help command in
|
@@ -462,6 +462,40 @@ Array and BioPerl's Bio::SimpleAlign. A very simple example is:
|
|
462
462
|
factory = Bio::ClustalW.new
|
463
463
|
a2 = a.do_align(factory)
|
464
464
|
|
465
|
+
== Restriction Enzymes (Bio::RE)
|
466
|
+
|
467
|
+
BioRuby has extensive support for restriction enzymes (REs). It contains a full
|
468
|
+
library of commonly used REs (from REBASE) which can be used to cut single
|
469
|
+
stranded RNA or dubbel stranded DNA into fragments. To list all enzymes:
|
470
|
+
|
471
|
+
rebase = Bio::RestrictionEnzyme.rebase
|
472
|
+
rebase.each do |enzyme_name, info|
|
473
|
+
p enzyme_name
|
474
|
+
end
|
475
|
+
|
476
|
+
and cut a sequence with an enzyme follow up with:
|
477
|
+
|
478
|
+
res = seq.cut_with_enzyme('EcoRII', {:max_permutations => 0}, {:view_ranges => true})
|
479
|
+
if res.kind_of? Symbol #error
|
480
|
+
err = Err.find_by_code(res.to_s)
|
481
|
+
unless err
|
482
|
+
err = Err.new(:code => res.to_s)
|
483
|
+
end
|
484
|
+
end
|
485
|
+
res.each do |frag|
|
486
|
+
em = EnzymeMatch.new
|
487
|
+
|
488
|
+
em.p_left = frag.p_left
|
489
|
+
em.p_right = frag.p_right
|
490
|
+
em.c_left = frag.c_left
|
491
|
+
em.c_right = frag.c_right
|
492
|
+
|
493
|
+
em.err = nil
|
494
|
+
em.enzyme = ar_enz
|
495
|
+
em.sequence = ar_seq
|
496
|
+
p em
|
497
|
+
end
|
498
|
+
|
465
499
|
|
466
500
|
== Sequence homology search by using the FASTA program (Bio::Fasta)
|
467
501
|
|
@@ -1123,14 +1157,17 @@ Please refer to KEGG_API.rd.ja (TRANSLATOR'S NOTE: English version: ((<URL:http:
|
|
1123
1157
|
|
1124
1158
|
* ((<URL:http://www.genome.jp/kegg/soap/>))
|
1125
1159
|
|
1160
|
+
== Comparing BioProjects
|
1161
|
+
|
1162
|
+
For a quick functional comparison of BioRuby, BioPerl, BioPython and Bioconductor (R) see ((<http://sciruby.codeforpeople.com/sr.cgi/BioProjects>))
|
1163
|
+
|
1126
1164
|
== Using BioRuby with R
|
1127
1165
|
|
1128
|
-
|
1129
|
-
|
1130
|
-
|
1131
|
-
this point you'll have to create some command line interface.
|
1166
|
+
Using Ruby with R Pjotr wrote a section on SciRuby. See ((<ULR:http://sciruby.codeforpeople.com/sr.cgi/RubyWithRlang>))
|
1167
|
+
|
1168
|
+
== Using BioPerl or BioPython from Ruby
|
1132
1169
|
|
1133
|
-
|
1170
|
+
At the moment there is no easy way of accessing BioPerl from Ruby. The best way, perhaps, is to create a Perl server that gets accessed through XML/RPC or SOAP.
|
1134
1171
|
|
1135
1172
|
== Installing required external library
|
1136
1173
|
|
data/lib/bio.rb
CHANGED
@@ -3,14 +3,14 @@
|
|
3
3
|
#
|
4
4
|
# Copyright:: Copyright (C) 2001-2006
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>
|
6
|
-
# License:: Ruby
|
6
|
+
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id: bio.rb,v 1.
|
8
|
+
# $Id: bio.rb,v 1.86 2007/07/16 12:26:28 ngoto Exp $
|
9
9
|
#
|
10
10
|
|
11
11
|
module Bio
|
12
12
|
|
13
|
-
BIORUBY_VERSION = [1,
|
13
|
+
BIORUBY_VERSION = [1, 1, 0].extend(Comparable)
|
14
14
|
|
15
15
|
### Basic data types
|
16
16
|
|
@@ -42,7 +42,12 @@ module Bio
|
|
42
42
|
|
43
43
|
autoload :Alignment, 'bio/alignment'
|
44
44
|
|
45
|
+
## Tree
|
46
|
+
autoload :Tree, 'bio/tree'
|
45
47
|
|
48
|
+
## Map
|
49
|
+
autoload :Map, 'bio/map'
|
50
|
+
|
46
51
|
### Constants
|
47
52
|
|
48
53
|
autoload :NucleicAcid, 'bio/data/na'
|
@@ -83,14 +88,15 @@ module Bio
|
|
83
88
|
autoload :GENES, 'bio/db/kegg/genes'
|
84
89
|
autoload :ENZYME, 'bio/db/kegg/enzyme'
|
85
90
|
autoload :COMPOUND, 'bio/db/kegg/compound'
|
91
|
+
autoload :DRUG, 'bio/db/kegg/drug'
|
86
92
|
autoload :GLYCAN, 'bio/db/kegg/glycan'
|
87
93
|
autoload :REACTION, 'bio/db/kegg/reaction'
|
88
94
|
autoload :BRITE, 'bio/db/kegg/brite'
|
89
95
|
autoload :CELL, 'bio/db/kegg/cell'
|
90
96
|
autoload :EXPRESSION, 'bio/db/kegg/expression'
|
91
|
-
autoload :
|
92
|
-
autoload :KO, 'bio/db/kegg/ko'
|
97
|
+
autoload :ORTHOLOGY, 'bio/db/kegg/orthology'
|
93
98
|
autoload :KGML, 'bio/db/kegg/kgml'
|
99
|
+
autoload :Taxonomy, 'bio/db/kegg/taxonomy'
|
94
100
|
end
|
95
101
|
|
96
102
|
## other formats
|
@@ -110,13 +116,19 @@ module Bio
|
|
110
116
|
autoload :GO, 'bio/db/go'
|
111
117
|
autoload :PDB, 'bio/db/pdb'
|
112
118
|
autoload :NBRF, 'bio/db/nbrf'
|
119
|
+
autoload :REBASE, 'bio/db/rebase'
|
120
|
+
autoload :SOFT, 'bio/db/soft'
|
121
|
+
autoload :Lasergene, 'bio/db/lasergene'
|
113
122
|
|
123
|
+
autoload :Newick, 'bio/db/newick'
|
124
|
+
autoload :Nexus, 'bio/db/nexus'
|
114
125
|
|
115
126
|
### IO interface modules
|
116
127
|
|
117
128
|
autoload :Registry, 'bio/io/registry'
|
118
129
|
autoload :Fetch, 'bio/io/fetch'
|
119
130
|
autoload :SQL, 'bio/io/sql'
|
131
|
+
autoload :SOAPWSDL, 'bio/io/soapwsdl'
|
120
132
|
autoload :FlatFile, 'bio/io/flatfile'
|
121
133
|
autoload :FlatFileIndex, 'bio/io/flatfile/index' # chage to FlatFile::Index ?
|
122
134
|
## below are described in bio/io/flatfile/index.rb
|
@@ -131,6 +143,8 @@ module Bio
|
|
131
143
|
autoload :DAS, 'bio/io/das'
|
132
144
|
autoload :DBGET, 'bio/io/dbget'
|
133
145
|
|
146
|
+
autoload :Ensembl, 'bio/io/ensembl'
|
147
|
+
|
134
148
|
## below are described in bio/appl/blast.rb
|
135
149
|
#class Blast
|
136
150
|
# autoload :Fastacmd, 'bio/io/fastacmd'
|
@@ -149,7 +163,13 @@ module Bio
|
|
149
163
|
autoload :HiGet, 'bio/io/higet'
|
150
164
|
end
|
151
165
|
|
152
|
-
|
166
|
+
class EBI
|
167
|
+
autoload :SOAP, 'bio/io/ebisoap'
|
168
|
+
end
|
169
|
+
|
170
|
+
class NCBI
|
171
|
+
autoload :SOAP, 'bio/io/ncbisoap'
|
172
|
+
end
|
153
173
|
|
154
174
|
|
155
175
|
### Applications
|
@@ -206,6 +226,10 @@ module Bio
|
|
206
226
|
# autoload :Report, 'bio/appl/mafft/report'
|
207
227
|
#end
|
208
228
|
|
229
|
+
autoload :Tcoffee, 'bio/appl/tcoffee'
|
230
|
+
autoload :Muscle, 'bio/appl/muscle'
|
231
|
+
autoload :Probcons, 'bio/appl/probcons'
|
232
|
+
|
209
233
|
autoload :Sim4, 'bio/appl/sim4'
|
210
234
|
## below are described in bio/appl/sim4.rb
|
211
235
|
#class Sim4
|
@@ -215,11 +239,40 @@ module Bio
|
|
215
239
|
autoload :Spidey, 'bio/appl/spidey/report'
|
216
240
|
autoload :Blat, 'bio/appl/blat/report'
|
217
241
|
|
242
|
+
module GCG
|
243
|
+
autoload :Msf, 'bio/appl/gcg/msf'
|
244
|
+
autoload :Seq, 'bio/appl/gcg/seq'
|
245
|
+
end
|
246
|
+
|
247
|
+
module Phylip
|
248
|
+
autoload :PhylipFormat, 'bio/appl/phylip/alignment'
|
249
|
+
autoload :DistanceMatrix, 'bio/appl/phylip/distance_matrix'
|
250
|
+
end
|
251
|
+
|
252
|
+
autoload :Iprscan, 'bio/appl/iprscan/report'
|
218
253
|
|
219
254
|
### Utilities
|
220
255
|
|
221
256
|
autoload :SiRNA, 'bio/util/sirna'
|
222
257
|
autoload :ColorScheme, 'bio/util/color_scheme'
|
258
|
+
autoload :ContingencyTable, 'bio/util/contingency_table'
|
259
|
+
autoload :RestrictionEnzyme, 'bio/util/restriction_enzyme'
|
260
|
+
|
261
|
+
### Service libraries
|
262
|
+
autoload :Command, 'bio/command'
|
263
|
+
|
264
|
+
### Provide BioRuby shell 'command' also as 'Bio.command' (like ChemRuby)
|
265
|
+
|
266
|
+
def self.method_missing(*args)
|
267
|
+
require 'bio/shell'
|
268
|
+
extend Bio::Shell
|
269
|
+
public_class_method(*Bio::Shell.private_instance_methods)
|
270
|
+
if Bio.respond_to?(args.first)
|
271
|
+
Bio.send(*args)
|
272
|
+
else
|
273
|
+
raise NameError
|
274
|
+
end
|
275
|
+
end
|
223
276
|
|
224
277
|
end
|
225
278
|
|
data/lib/bio/alignment.rb
CHANGED
@@ -1,28 +1,12 @@
|
|
1
1
|
#
|
2
2
|
# = bio/alignment.rb - multiple alignment of sequences
|
3
3
|
#
|
4
|
-
# Copyright:: Copyright (C) 2003, 2005
|
5
|
-
# GOTO Naohisa <
|
4
|
+
# Copyright:: Copyright (C) 2003, 2005, 2006
|
5
|
+
# GOTO Naohisa <ng@bioruby.org>
|
6
6
|
#
|
7
|
-
# License::
|
7
|
+
# License:: The Ruby License
|
8
8
|
#
|
9
|
-
# $Id: alignment.rb,v 1.
|
10
|
-
#
|
11
|
-
#--
|
12
|
-
# This library is free software; you can redistribute it and/or
|
13
|
-
# modify it under the terms of the GNU Lesser General Public
|
14
|
-
# License as published by the Free Software Foundation; either
|
15
|
-
# version 2 of the License, or (at your option) any later version.
|
16
|
-
#
|
17
|
-
# This library is distributed in the hope that it will be useful,
|
18
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
19
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
20
|
-
# Lesser General Public License for more details.
|
21
|
-
#
|
22
|
-
# You should have received a copy of the GNU Lesser General Public
|
23
|
-
# License along with this library; if not, write to the Free Software
|
24
|
-
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
25
|
-
#++
|
9
|
+
# $Id: alignment.rb,v 1.23 2007/07/16 12:21:39 ngoto Exp $
|
26
10
|
#
|
27
11
|
# = About Bio::Alignment
|
28
12
|
#
|
@@ -37,52 +21,59 @@
|
|
37
21
|
# http://doc.bioperl.org/releases/bioperl-1.4/Bio/SimpleAlign.html
|
38
22
|
#
|
39
23
|
|
24
|
+
require 'tempfile'
|
25
|
+
require 'bio/command'
|
40
26
|
require 'bio/sequence'
|
41
27
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
= About Bio::Alignment
|
47
|
-
|
48
|
-
Bio::Alignment is a namespace of classes/modules for multiple sequence
|
49
|
-
alignment.
|
50
|
-
|
51
|
-
= Multiple alignment container classes
|
52
|
-
|
53
|
-
== Bio::Alignment::OriginalAlignment
|
54
|
-
|
55
|
-
== Bio::Alignment::SequenceArray
|
56
|
-
|
57
|
-
== Bio::Alignment::SequenceHash
|
58
|
-
|
59
|
-
= Bio::Alignment::Site
|
60
|
-
|
61
|
-
= Modules
|
62
|
-
|
63
|
-
== Bio::Alignment::EnumerableExtension
|
64
|
-
|
65
|
-
Mix-in for classes included Enumerable.
|
66
|
-
|
67
|
-
== Bio::Alignment::ArrayExtension
|
68
|
-
|
69
|
-
Mix-in for Array or Array-like classes.
|
28
|
+
#---
|
29
|
+
# (depends on autoload)
|
30
|
+
#require 'bio/appl/gcg/seq'
|
31
|
+
#+++
|
70
32
|
|
71
|
-
|
72
|
-
|
73
|
-
Mix-in for Hash or Hash-like classes.
|
74
|
-
|
75
|
-
== Bio::Alignment::SiteMethods
|
76
|
-
|
77
|
-
== Bio::Alignment::PropertyMethods
|
78
|
-
|
79
|
-
= Bio::Alignment::GAP
|
80
|
-
|
81
|
-
= Compatibility from older BioRuby
|
33
|
+
module Bio
|
82
34
|
|
83
|
-
|
35
|
+
#
|
36
|
+
# = About Bio::Alignment
|
37
|
+
#
|
38
|
+
# Bio::Alignment is a namespace of classes/modules for multiple sequence
|
39
|
+
# alignment.
|
40
|
+
#
|
41
|
+
# = Multiple alignment container classes
|
42
|
+
#
|
43
|
+
# == Bio::Alignment::OriginalAlignment
|
44
|
+
#
|
45
|
+
# == Bio::Alignment::SequenceArray
|
46
|
+
#
|
47
|
+
# == Bio::Alignment::SequenceHash
|
48
|
+
#
|
49
|
+
# = Bio::Alignment::Site
|
50
|
+
#
|
51
|
+
# = Modules
|
52
|
+
#
|
53
|
+
# == Bio::Alignment::EnumerableExtension
|
54
|
+
#
|
55
|
+
# Mix-in for classes included Enumerable.
|
56
|
+
#
|
57
|
+
# == Bio::Alignment::ArrayExtension
|
58
|
+
#
|
59
|
+
# Mix-in for Array or Array-like classes.
|
60
|
+
#
|
61
|
+
# == Bio::Alignment::HashExtension
|
62
|
+
#
|
63
|
+
# Mix-in for Hash or Hash-like classes.
|
64
|
+
#
|
65
|
+
# == Bio::Alignment::SiteMethods
|
66
|
+
#
|
67
|
+
# == Bio::Alignment::PropertyMethods
|
68
|
+
#
|
69
|
+
# = Bio::Alignment::GAP
|
70
|
+
#
|
71
|
+
# = Compatibility from older BioRuby
|
72
|
+
#
|
84
73
|
module Alignment
|
85
74
|
|
75
|
+
autoload :MultiFastaFormat, 'bio/appl/mafft/report'
|
76
|
+
|
86
77
|
# Bio::Alignment::PropertyMethods is a set of methods to treat
|
87
78
|
# the gap character and so on.
|
88
79
|
module PropertyMethods
|
@@ -640,10 +631,14 @@ Mix-in for Hash or Hash-like classes.
|
|
640
631
|
amino = true
|
641
632
|
elsif seqclass == Bio::Sequence::NA then
|
642
633
|
amino = false
|
643
|
-
elsif self.find { |x| /[EFILPQ]/i =~ x } then
|
644
|
-
amino = true
|
645
634
|
else
|
646
635
|
amino = nil
|
636
|
+
self.each_seq do |x|
|
637
|
+
if /[EFILPQ]/i =~ x
|
638
|
+
amino = true
|
639
|
+
break
|
640
|
+
end
|
641
|
+
end
|
647
642
|
end
|
648
643
|
end
|
649
644
|
if amino then
|
@@ -873,16 +868,31 @@ Mix-in for Hash or Hash-like classes.
|
|
873
868
|
end
|
874
869
|
end #module EnumerableExtension
|
875
870
|
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
|
880
|
-
|
881
|
-
|
871
|
+
module Output
|
872
|
+
def output(format, *arg)
|
873
|
+
case format
|
874
|
+
when :clustal
|
875
|
+
output_clustal(*arg)
|
876
|
+
when :fasta
|
877
|
+
output_fasta(*arg)
|
878
|
+
when :phylip
|
879
|
+
output_phylip(*arg)
|
880
|
+
when :phylipnon
|
881
|
+
output_phylipnon(*arg)
|
882
|
+
when :msf
|
883
|
+
output_msf(*arg)
|
884
|
+
when :molphy
|
885
|
+
output_molphy(*arg)
|
886
|
+
else
|
887
|
+
raise "Unknown format: #{format.inspect}"
|
888
|
+
end
|
889
|
+
end
|
890
|
+
|
891
|
+
# Check whether there are same names for ClustalW format.
|
882
892
|
#
|
883
893
|
# array:: names of the sequences (array of string)
|
884
894
|
# len:: length to check (default:30)
|
885
|
-
def
|
895
|
+
def __clustal_have_same_name?(array, len = 30)
|
886
896
|
na30 = array.collect do |k|
|
887
897
|
k.to_s.split(/[\x00\s]/)[0].to_s[0, len].gsub(/\:\;\,\(\)/, '_').to_s
|
888
898
|
end
|
@@ -909,15 +919,16 @@ Mix-in for Hash or Hash-like classes.
|
|
909
919
|
false
|
910
920
|
end
|
911
921
|
end
|
912
|
-
private :
|
922
|
+
private :__clustal_have_same_name?
|
913
923
|
|
914
|
-
# Changes sequence names if there are conflicted names
|
924
|
+
# Changes sequence names if there are conflicted names
|
925
|
+
# for ClustalW format.
|
915
926
|
#
|
916
927
|
# array:: names of the sequences (array of string)
|
917
928
|
# len:: length to check (default:30)
|
918
|
-
def
|
929
|
+
def __clustal_avoid_same_name(array, len = 30)
|
919
930
|
na = array.collect { |k| k.to_s.gsub(/[\r\n\x00]/, ' ') }
|
920
|
-
if dupidx =
|
931
|
+
if dupidx = __clustal_have_same_name?(na, len)
|
921
932
|
procs = [
|
922
933
|
Proc.new { |s, i|
|
923
934
|
s[0, len].to_s.gsub(/\s/, '_') + s[len..-1].to_s
|
@@ -931,7 +942,7 @@ Mix-in for Hash or Hash-like classes.
|
|
931
942
|
s = array[i]
|
932
943
|
na[i] = pr.call(s.to_s, i)
|
933
944
|
end
|
934
|
-
dupidx =
|
945
|
+
dupidx = __clustal_have_same_name?(na, len)
|
935
946
|
break unless dupidx
|
936
947
|
end
|
937
948
|
if dupidx then
|
@@ -942,13 +953,13 @@ Mix-in for Hash or Hash-like classes.
|
|
942
953
|
end
|
943
954
|
na
|
944
955
|
end
|
945
|
-
private :
|
956
|
+
private :__clustal_avoid_same_name
|
946
957
|
|
947
958
|
# Generates ClustalW-formatted text
|
948
959
|
# seqs:: sequences (must be an alignment object)
|
949
960
|
# names:: names of the sequences
|
950
961
|
# options:: options
|
951
|
-
def
|
962
|
+
def __clustal_formatter(seqs, names, options = {})
|
952
963
|
#(original)
|
953
964
|
aln = [ "CLUSTAL (0.00) multiple sequence alignment\n\n" ]
|
954
965
|
len = seqs.seq_length
|
@@ -963,7 +974,7 @@ Mix-in for Hash or Hash-like classes.
|
|
963
974
|
sn.collect! { |x| x.split(/\s/)[0].to_s }
|
964
975
|
end
|
965
976
|
if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
|
966
|
-
sn =
|
977
|
+
sn = __clustal_avoid_same_name(sn)
|
967
978
|
end
|
968
979
|
|
969
980
|
if sn.find { |x| x.length > 10 } then
|
@@ -988,8 +999,9 @@ Mix-in for Hash or Hash-like classes.
|
|
988
999
|
end
|
989
1000
|
mline = (options[:match_line] or seqs.match_line(mopt))
|
990
1001
|
|
991
|
-
aseqs = seqs.
|
992
|
-
|
1002
|
+
aseqs = Array.new(seqs.number_of_sequences).clear
|
1003
|
+
seqs.each_seq do |s|
|
1004
|
+
aseqs << s.to_s.gsub(seqs.gap_regexp, gchar)
|
993
1005
|
end
|
994
1006
|
case options[:case].to_s
|
995
1007
|
when /lower/i
|
@@ -1023,9 +1035,295 @@ Mix-in for Hash or Hash-like classes.
|
|
1023
1035
|
end
|
1024
1036
|
aln.join('')
|
1025
1037
|
end
|
1026
|
-
private :
|
1027
|
-
|
1038
|
+
private :__clustal_formatter
|
1039
|
+
|
1040
|
+
# Generates ClustalW-formatted text
|
1041
|
+
# seqs:: sequences (must be an alignment object)
|
1042
|
+
# names:: names of the sequences
|
1043
|
+
# options:: options
|
1044
|
+
def output_clustal(options = {})
|
1045
|
+
__clustal_formatter(self, self.sequence_names, options)
|
1046
|
+
end
|
1047
|
+
|
1048
|
+
# to_clustal is deprecated. Instead, please use output_clustal.
|
1049
|
+
#---
|
1050
|
+
#alias to_clustal output_clustal
|
1051
|
+
#+++
|
1052
|
+
def to_clustal(*arg)
|
1053
|
+
warn "to_clustal is deprecated. Please use output_clustal."
|
1054
|
+
output_clustal(*arg)
|
1055
|
+
end
|
1056
|
+
|
1057
|
+
# Generates fasta format text and returns a string.
|
1058
|
+
def output_fasta(options={})
|
1059
|
+
#(original)
|
1060
|
+
width = (options[:width] or 70)
|
1061
|
+
if options[:avoid_same_name] then
|
1062
|
+
na = __clustal_avoid_same_name(self.sequence_names, 30)
|
1063
|
+
else
|
1064
|
+
na = self.sequence_names.collect do |k|
|
1065
|
+
k.to_s.gsub(/[\r\n\x00]/, ' ')
|
1066
|
+
end
|
1067
|
+
end
|
1068
|
+
if width and width > 0 then
|
1069
|
+
w_reg = Regexp.new(".{1,#{width}}")
|
1070
|
+
self.collect do |s|
|
1071
|
+
">#{na.shift}\n" + s.to_s.gsub(w_reg, "\\0\n")
|
1072
|
+
end.join('')
|
1073
|
+
else
|
1074
|
+
self.collect do |s|
|
1075
|
+
">#{na.shift}\n" + s.to_s + "\n"
|
1076
|
+
end.join('')
|
1077
|
+
end
|
1078
|
+
end
|
1079
|
+
|
1080
|
+
# generates phylip interleaved alignment format as a string
|
1081
|
+
def output_phylip(options = {})
|
1082
|
+
aln, aseqs, lines = __output_phylip_common(options)
|
1083
|
+
lines.times do
|
1084
|
+
aseqs.each { |a| aln << a.shift }
|
1085
|
+
aln << "\n"
|
1086
|
+
end
|
1087
|
+
aln.pop if aln[-1] == "\n"
|
1088
|
+
aln.join('')
|
1089
|
+
end
|
1028
1090
|
|
1091
|
+
# generates Phylip3.2 (old) non-interleaved format as a string
|
1092
|
+
def output_phylipnon(options = {})
|
1093
|
+
aln, aseqs, lines = __output_phylip_common(options)
|
1094
|
+
aln.first + aseqs.join('')
|
1095
|
+
end
|
1096
|
+
|
1097
|
+
# common routine for interleaved/non-interleaved phylip format
|
1098
|
+
def __output_phylip_common(options = {})
|
1099
|
+
len = self.alignment_length
|
1100
|
+
aln = [ " #{self.number_of_sequences} #{len}\n" ]
|
1101
|
+
sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') }
|
1102
|
+
if options[:replace_space]
|
1103
|
+
sn.collect! { |x| x.gsub(/\s/, '_') }
|
1104
|
+
end
|
1105
|
+
if !options.has_key?(:escape) or options[:escape]
|
1106
|
+
sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
|
1107
|
+
end
|
1108
|
+
if !options.has_key?(:split) or options[:split]
|
1109
|
+
sn.collect! { |x| x.split(/\s/)[0].to_s }
|
1110
|
+
end
|
1111
|
+
if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
|
1112
|
+
sn = __clustal_avoid_same_name(sn, 10)
|
1113
|
+
end
|
1114
|
+
|
1115
|
+
namewidth = 10
|
1116
|
+
seqwidth = (options[:width] or 60)
|
1117
|
+
seqwidth = seqwidth.div(10) * 10
|
1118
|
+
seqregexp = Regexp.new("(.{1,#{seqwidth.div(10) * 11}})")
|
1119
|
+
gchar = (options[:gap_char] or '-')
|
1120
|
+
|
1121
|
+
aseqs = Array.new(self.number_of_sequences).clear
|
1122
|
+
self.each_seq do |s|
|
1123
|
+
aseqs << s.to_s.gsub(self.gap_regexp, gchar)
|
1124
|
+
end
|
1125
|
+
case options[:case].to_s
|
1126
|
+
when /lower/i
|
1127
|
+
aseqs.each { |s| s.downcase! }
|
1128
|
+
when /upper/i
|
1129
|
+
aseqs.each { |s| s.upcase! }
|
1130
|
+
end
|
1131
|
+
|
1132
|
+
aseqs.collect! do |s|
|
1133
|
+
snx = sn.shift
|
1134
|
+
head = sprintf("%*s", -namewidth, snx.to_s)[0, namewidth]
|
1135
|
+
head2 = ' ' * namewidth
|
1136
|
+
s << (gchar * (len - s.length))
|
1137
|
+
s.gsub!(/(.{1,10})/n, " \\1")
|
1138
|
+
s.gsub!(seqregexp, "\\1\n")
|
1139
|
+
a = s.split(/^/)
|
1140
|
+
head += a.shift
|
1141
|
+
ret = a.collect { |x| head2 + x }
|
1142
|
+
ret.unshift(head)
|
1143
|
+
ret
|
1144
|
+
end
|
1145
|
+
lines = (len + seqwidth - 1).div(seqwidth)
|
1146
|
+
[ aln, aseqs, lines ]
|
1147
|
+
end
|
1148
|
+
|
1149
|
+
# Generates Molphy alignment format text as a string
|
1150
|
+
def output_molphy(options = {})
|
1151
|
+
len = self.alignment_length
|
1152
|
+
header = "#{self.number_of_sequences} #{len}\n"
|
1153
|
+
sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') }
|
1154
|
+
if options[:replace_space]
|
1155
|
+
sn.collect! { |x| x.gsub(/\s/, '_') }
|
1156
|
+
end
|
1157
|
+
if !options.has_key?(:escape) or options[:escape]
|
1158
|
+
sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
|
1159
|
+
end
|
1160
|
+
if !options.has_key?(:split) or options[:split]
|
1161
|
+
sn.collect! { |x| x.split(/\s/)[0].to_s }
|
1162
|
+
end
|
1163
|
+
if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
|
1164
|
+
sn = __clustal_avoid_same_name(sn, 30)
|
1165
|
+
end
|
1166
|
+
|
1167
|
+
seqwidth = (options[:width] or 60)
|
1168
|
+
seqregexp = Regexp.new("(.{1,#{seqwidth}})")
|
1169
|
+
gchar = (options[:gap_char] or '-')
|
1170
|
+
|
1171
|
+
aseqs = Array.new(len).clear
|
1172
|
+
self.each_seq do |s|
|
1173
|
+
aseqs << s.to_s.gsub(self.gap_regexp, gchar)
|
1174
|
+
end
|
1175
|
+
case options[:case].to_s
|
1176
|
+
when /lower/i
|
1177
|
+
aseqs.each { |s| s.downcase! }
|
1178
|
+
when /upper/i
|
1179
|
+
aseqs.each { |s| s.upcase! }
|
1180
|
+
end
|
1181
|
+
|
1182
|
+
aseqs.collect! do |s|
|
1183
|
+
s << (gchar * (len - s.length))
|
1184
|
+
s.gsub!(seqregexp, "\\1\n")
|
1185
|
+
sn.shift + "\n" + s
|
1186
|
+
end
|
1187
|
+
aseqs.unshift(header)
|
1188
|
+
aseqs.join('')
|
1189
|
+
end
|
1190
|
+
|
1191
|
+
# Generates msf formatted text as a string
|
1192
|
+
def output_msf(options = {})
|
1193
|
+
len = self.seq_length
|
1194
|
+
|
1195
|
+
if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
|
1196
|
+
sn = __clustal_avoid_same_name(self.sequence_names)
|
1197
|
+
else
|
1198
|
+
sn = self.sequence_names.collect do |x|
|
1199
|
+
x.to_s.gsub(/[\r\n\x00]/, ' ')
|
1200
|
+
end
|
1201
|
+
end
|
1202
|
+
if !options.has_key?(:replace_space) or options[:replace_space]
|
1203
|
+
sn.collect! { |x| x.gsub(/\s/, '_') }
|
1204
|
+
end
|
1205
|
+
if !options.has_key?(:escape) or options[:escape]
|
1206
|
+
sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
|
1207
|
+
end
|
1208
|
+
if !options.has_key?(:split) or options[:split]
|
1209
|
+
sn.collect! { |x| x.split(/\s/)[0].to_s }
|
1210
|
+
end
|
1211
|
+
|
1212
|
+
seqwidth = 50
|
1213
|
+
namewidth = [31, sn.collect { |x| x.length }.max ].min
|
1214
|
+
sep = ' ' * 2
|
1215
|
+
|
1216
|
+
seqregexp = Regexp.new("(.{1,#{seqwidth}})")
|
1217
|
+
gchar = (options[:gap_char] or '.')
|
1218
|
+
pchar = (options[:padding_char] or '~')
|
1219
|
+
|
1220
|
+
aseqs = Array.new(self.number_of_sequences).clear
|
1221
|
+
self.each_seq do |s|
|
1222
|
+
aseqs << s.to_s.gsub(self.gap_regexp, gchar)
|
1223
|
+
end
|
1224
|
+
aseqs.each do |s|
|
1225
|
+
s.sub!(/\A#{Regexp.escape(gchar)}+/) { |x| pchar * x.length }
|
1226
|
+
s.sub!(/#{Regexp.escape(gchar)}+\z/, '')
|
1227
|
+
s << (pchar * (len - s.length))
|
1228
|
+
end
|
1229
|
+
|
1230
|
+
case options[:case].to_s
|
1231
|
+
when /lower/i
|
1232
|
+
aseqs.each { |s| s.downcase! }
|
1233
|
+
when /upper/i
|
1234
|
+
aseqs.each { |s| s.upcase! }
|
1235
|
+
else #default upcase
|
1236
|
+
aseqs.each { |s| s.upcase! }
|
1237
|
+
end
|
1238
|
+
|
1239
|
+
case options[:type].to_s
|
1240
|
+
when /protein/i, /aa/i
|
1241
|
+
amino = true
|
1242
|
+
when /na/i
|
1243
|
+
amino = false
|
1244
|
+
else
|
1245
|
+
if seqclass == Bio::Sequence::AA then
|
1246
|
+
amino = true
|
1247
|
+
elsif seqclass == Bio::Sequence::NA then
|
1248
|
+
amino = false
|
1249
|
+
else
|
1250
|
+
# if we can't determine, we asuume as protein.
|
1251
|
+
amino = aseqs.size
|
1252
|
+
aseqs.each { |x| amino -= 1 if /\A[acgt]\z/i =~ x }
|
1253
|
+
amino = false if amino <= 0
|
1254
|
+
end
|
1255
|
+
end
|
1256
|
+
|
1257
|
+
seq_type = (amino ? 'P' : 'N')
|
1258
|
+
|
1259
|
+
fn = (options[:entry_id] or self.__id__.abs.to_s + '.msf')
|
1260
|
+
dt = (options[:time] or Time.now).strftime('%B %d, %Y %H:%M')
|
1261
|
+
|
1262
|
+
sums = aseqs.collect { |s| GCG::Seq.calc_checksum(s) }
|
1263
|
+
#sums = aseqs.collect { |s| 0 }
|
1264
|
+
sum = 0; sums.each { |x| sum += x }; sum %= 10000
|
1265
|
+
msf =
|
1266
|
+
[
|
1267
|
+
"#{seq_type == 'N' ? 'N' : 'A' }A_MULTIPLE_ALIGNMENT 1.0\n",
|
1268
|
+
"\n",
|
1269
|
+
"\n",
|
1270
|
+
" #{fn} MSF: #{len} Type: #{seq_type} #{dt} Check: #{sum} ..\n",
|
1271
|
+
"\n"
|
1272
|
+
]
|
1273
|
+
|
1274
|
+
sn.each do |snx|
|
1275
|
+
msf << ' Name: ' +
|
1276
|
+
sprintf('%*s', -namewidth, snx.to_s)[0, namewidth] +
|
1277
|
+
" Len: #{len} Check: #{sums.shift} Weight: 1.00\n"
|
1278
|
+
end
|
1279
|
+
msf << "\n//\n"
|
1280
|
+
|
1281
|
+
aseqs.collect! do |s|
|
1282
|
+
snx = sn.shift
|
1283
|
+
head = sprintf("%*s", namewidth, snx.to_s)[0, namewidth] + sep
|
1284
|
+
s.gsub!(seqregexp, "\\1\n")
|
1285
|
+
a = s.split(/^/)
|
1286
|
+
a.collect { |x| head + x }
|
1287
|
+
end
|
1288
|
+
lines = (len + seqwidth - 1).div(seqwidth)
|
1289
|
+
i = 1
|
1290
|
+
lines.times do
|
1291
|
+
msf << "\n"
|
1292
|
+
n_l = i
|
1293
|
+
n_r = [ i + seqwidth - 1, len ].min
|
1294
|
+
if n_l != n_r then
|
1295
|
+
w = [ n_r - n_l + 1 - n_l.to_s.length - n_r.to_s.length, 1 ].max
|
1296
|
+
msf << (' ' * namewidth + sep + n_l.to_s +
|
1297
|
+
' ' * w + n_r.to_s + "\n")
|
1298
|
+
else
|
1299
|
+
msf << (' ' * namewidth + sep + n_l.to_s + "\n")
|
1300
|
+
end
|
1301
|
+
aseqs.each { |a| msf << a.shift }
|
1302
|
+
i += seqwidth
|
1303
|
+
end
|
1304
|
+
msf << "\n"
|
1305
|
+
msf.join('')
|
1306
|
+
end
|
1307
|
+
|
1308
|
+
end #module Output
|
1309
|
+
|
1310
|
+
module EnumerableExtension
|
1311
|
+
include Output
|
1312
|
+
|
1313
|
+
# Returns number of sequences in this alignment.
|
1314
|
+
def number_of_sequences
|
1315
|
+
i = 0
|
1316
|
+
self.each_seq { |s| i += 1 }
|
1317
|
+
i
|
1318
|
+
end
|
1319
|
+
|
1320
|
+
# Returns an array of sequence names.
|
1321
|
+
# The order of the names must be the same as
|
1322
|
+
# the order of <tt>each_seq</tt>.
|
1323
|
+
def sequence_names
|
1324
|
+
(0...(self.number_of_sequences)).to_a
|
1325
|
+
end
|
1326
|
+
end #module EnumerableExtension
|
1029
1327
|
|
1030
1328
|
# Bio::Alignment::ArrayExtension is a set of useful methods for
|
1031
1329
|
# multiple sequence alignment.
|
@@ -1046,10 +1344,9 @@ Mix-in for Hash or Hash-like classes.
|
|
1046
1344
|
each(&block)
|
1047
1345
|
end
|
1048
1346
|
|
1049
|
-
|
1050
|
-
|
1051
|
-
|
1052
|
-
clustalw_formatter(self, (0...(self.size)).to_a, options)
|
1347
|
+
# Returns number of sequences in this alignment.
|
1348
|
+
def number_of_sequences
|
1349
|
+
self.size
|
1053
1350
|
end
|
1054
1351
|
end #module ArrayExtension
|
1055
1352
|
|
@@ -1077,8 +1374,9 @@ Mix-in for Hash or Hash-like classes.
|
|
1077
1374
|
# Yields a sequence.
|
1078
1375
|
#
|
1079
1376
|
# It works the same as Hash#each_value.
|
1080
|
-
def each_seq
|
1081
|
-
each_value(&block)
|
1377
|
+
def each_seq #:yields: seq
|
1378
|
+
#each_value(&block)
|
1379
|
+
each_key { |k| yield self[k] }
|
1082
1380
|
end
|
1083
1381
|
|
1084
1382
|
# Iterates over each sequence and each results running block
|
@@ -1140,15 +1438,16 @@ Mix-in for Hash or Hash-like classes.
|
|
1140
1438
|
self
|
1141
1439
|
end
|
1142
1440
|
|
1143
|
-
|
1144
|
-
|
1145
|
-
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1149
|
-
|
1150
|
-
|
1151
|
-
|
1441
|
+
# Returns number of sequences in this alignment.
|
1442
|
+
def number_of_sequences
|
1443
|
+
self.size
|
1444
|
+
end
|
1445
|
+
|
1446
|
+
# Returns an array of sequence names.
|
1447
|
+
# The order of the names must be the same as
|
1448
|
+
# the order of <tt>each_seq</tt>.
|
1449
|
+
def sequence_names
|
1450
|
+
self.keys
|
1152
1451
|
end
|
1153
1452
|
end #module HashExtension
|
1154
1453
|
|
@@ -1423,6 +1722,7 @@ Mix-in for Hash or Hash-like classes.
|
|
1423
1722
|
#(Hash&Array-like)
|
1424
1723
|
@seqs.size
|
1425
1724
|
end
|
1725
|
+
alias number_of_sequences size
|
1426
1726
|
|
1427
1727
|
# If the key exists, returns true. Otherwise, returns false.
|
1428
1728
|
# (Like Hash#has_key?)
|
@@ -1800,7 +2100,7 @@ Mix-in for Hash or Hash-like classes.
|
|
1800
2100
|
options = (arg.shift or {})
|
1801
2101
|
width = options[:width] unless width
|
1802
2102
|
if options[:avoid_same_name] then
|
1803
|
-
na =
|
2103
|
+
na = __clustal_avoid_same_name(self.keys, 30)
|
1804
2104
|
else
|
1805
2105
|
na = self.keys.collect { |k| k.to_s.gsub(/[\r\n\x00]/, ' ') }
|
1806
2106
|
end
|
@@ -1831,17 +2131,15 @@ Mix-in for Hash or Hash-like classes.
|
|
1831
2131
|
# Converts to fasta format and returns a string.
|
1832
2132
|
#
|
1833
2133
|
# The specification of the argument will be changed.
|
2134
|
+
#
|
2135
|
+
# Note: <tt>to_fasta</tt> is deprecated.
|
2136
|
+
# Please use <tt>output_fasta</tt> instead.
|
1834
2137
|
def to_fasta(*arg)
|
1835
2138
|
#(original)
|
2139
|
+
warn "to_fasta is deprecated. Please use output_fasta."
|
1836
2140
|
self.to_fasta_array(*arg).join('')
|
1837
2141
|
end
|
1838
2142
|
|
1839
|
-
include ClustalWFormatter
|
1840
|
-
# Returns a string of Clustal W formatted text of the alignment.
|
1841
|
-
def to_clustal(options = {})
|
1842
|
-
clustalw_formatter(self, self.keys, options)
|
1843
|
-
end
|
1844
|
-
|
1845
2143
|
# The method name <tt>consensus</tt> will be obsoleted.
|
1846
2144
|
# Please use <tt>consensus_string</tt> instead.
|
1847
2145
|
alias consensus consensus_string
|
@@ -1900,6 +2198,318 @@ Mix-in for Hash or Hash-like classes.
|
|
1900
2198
|
def self.readfiles(*files)
|
1901
2199
|
OriginalAlignment.readfiles(*files)
|
1902
2200
|
end
|
2201
|
+
|
2202
|
+
#---
|
2203
|
+
# Service classes for multiple alignment applications
|
2204
|
+
#+++
|
2205
|
+
#---
|
2206
|
+
# Templates of alignment application factory
|
2207
|
+
#+++
|
2208
|
+
|
2209
|
+
# Namespace for templates for alignment application factory
|
2210
|
+
module FactoryTemplate
|
2211
|
+
|
2212
|
+
# Template class for alignment application factory.
|
2213
|
+
# The program acts:
|
2214
|
+
# input: stdin or file, format = fasta format
|
2215
|
+
# output: stdout (parser should be specified by DEFAULT_PARSER)
|
2216
|
+
class Simple
|
2217
|
+
|
2218
|
+
# Creates a new alignment factory
|
2219
|
+
def initialize(program = self.class::DEFAULT_PROGRAM, options = [])
|
2220
|
+
@program = program
|
2221
|
+
@options = options
|
2222
|
+
@command = nil
|
2223
|
+
@output = nil
|
2224
|
+
@report = nil
|
2225
|
+
@exit_status = nil
|
2226
|
+
@data_stdout = nil
|
2227
|
+
end
|
2228
|
+
|
2229
|
+
# program name
|
2230
|
+
attr_accessor :program
|
2231
|
+
|
2232
|
+
# options
|
2233
|
+
attr_accessor :options
|
2234
|
+
|
2235
|
+
# Last command-line string. Returns nil or an array of String.
|
2236
|
+
# Note that filenames described in the command-line may already
|
2237
|
+
# be removed because these files may be temporary files.
|
2238
|
+
attr_reader :command
|
2239
|
+
|
2240
|
+
# Last raw result of the program.
|
2241
|
+
# Return a string (or nil).
|
2242
|
+
attr_reader :output
|
2243
|
+
|
2244
|
+
# Last result object performed by the factory.
|
2245
|
+
attr_reader :report
|
2246
|
+
|
2247
|
+
# Last exit status
|
2248
|
+
attr_reader :exit_status
|
2249
|
+
|
2250
|
+
# Last output to the stdout.
|
2251
|
+
attr_accessor :data_stdout
|
2252
|
+
|
2253
|
+
# Clear the internal data and status, except program and options.
|
2254
|
+
def reset
|
2255
|
+
@command = nil
|
2256
|
+
@output = nil
|
2257
|
+
@report = nil
|
2258
|
+
@exit_status = nil
|
2259
|
+
@data_stdout = nil
|
2260
|
+
end
|
2261
|
+
|
2262
|
+
# Executes the program.
|
2263
|
+
# If +seqs+ is not nil, perform alignment for seqs.
|
2264
|
+
# If +seqs+ is nil, simply executes the program.
|
2265
|
+
#
|
2266
|
+
# Compatibility note: When seqs is nil,
|
2267
|
+
# returns true if the program exits normally, and
|
2268
|
+
# returns false if the program exits abnormally.
|
2269
|
+
def query(seqs)
|
2270
|
+
if seqs then
|
2271
|
+
query_alignment(seqs)
|
2272
|
+
else
|
2273
|
+
exec_local(@options)
|
2274
|
+
@exit_status.exitstatus == 0 ? true : false
|
2275
|
+
end
|
2276
|
+
end
|
2277
|
+
|
2278
|
+
# Performs alignment for seqs.
|
2279
|
+
# +seqs+ should be Bio::Alignment or Array of sequences or nil.
|
2280
|
+
def query_alignment(seqs)
|
2281
|
+
unless seqs.respond_to?(:output_fasta) then
|
2282
|
+
seqs = Bio::Alignment.new(seqs)
|
2283
|
+
end
|
2284
|
+
query_string(seqs.output_fasta(:width => 70))
|
2285
|
+
end
|
2286
|
+
|
2287
|
+
# alias of query_alignment.
|
2288
|
+
#
|
2289
|
+
# Compatibility Note: query_align will renamed to query_alignment.
|
2290
|
+
def query_align(seqs)
|
2291
|
+
#warn 'query_align is renamed to query_alignment.'
|
2292
|
+
query_alignment(seqs)
|
2293
|
+
end
|
2294
|
+
|
2295
|
+
# Performs alignment for +str+.
|
2296
|
+
# The +str+ should be a string that can be recognized by the program.
|
2297
|
+
def query_string(str)
|
2298
|
+
_query_string(str, @options)
|
2299
|
+
@report
|
2300
|
+
end
|
2301
|
+
|
2302
|
+
# Performs alignment of sequences in the file named +fn+.
|
2303
|
+
def query_by_filename(filename_in)
|
2304
|
+
_query_local(filename_in, @options)
|
2305
|
+
@report
|
2306
|
+
end
|
2307
|
+
|
2308
|
+
private
|
2309
|
+
# Executes a program in the local machine.
|
2310
|
+
def exec_local(opt, data_stdin = nil)
|
2311
|
+
@exit_status = nil
|
2312
|
+
@command = [ @program, *opt ]
|
2313
|
+
#STDERR.print "DEBUG: ", @command.join(" "), "\n"
|
2314
|
+
@data_stdout = Bio::Command.query_command(@command, data_stdin)
|
2315
|
+
@exit_status = $?
|
2316
|
+
end
|
2317
|
+
|
2318
|
+
# prepare temporary file
|
2319
|
+
def _prepare_tempfile(str = nil)
|
2320
|
+
tf_in = Tempfile.open(str ? 'alignment_i' :'alignment_o')
|
2321
|
+
tf_in.print str if str
|
2322
|
+
tf_in.close(false)
|
2323
|
+
tf_in
|
2324
|
+
end
|
2325
|
+
|
2326
|
+
# generates options specifying input/output filename.
|
2327
|
+
# nil for filename means stdin or stdout.
|
2328
|
+
# +options+ must not contain specify filenames.
|
2329
|
+
# returns an array of string.
|
2330
|
+
def _generate_options(infile, outfile, options)
|
2331
|
+
options +
|
2332
|
+
(infile ? _option_input_file(infile) : _option_input_stdin) +
|
2333
|
+
(outfile ? _option_output_file(outfile) : _option_output_stdout)
|
2334
|
+
end
|
2335
|
+
|
2336
|
+
# generates options specifying input filename.
|
2337
|
+
# returns an array of string
|
2338
|
+
def _option_input_file(fn)
|
2339
|
+
[ fn ]
|
2340
|
+
end
|
2341
|
+
|
2342
|
+
# generates options specifying output filename.
|
2343
|
+
# returns an array of string
|
2344
|
+
def _option_output_file(fn)
|
2345
|
+
raise 'can not specify output file: always stdout'
|
2346
|
+
end
|
2347
|
+
|
2348
|
+
# generates options specifying that input is taken from stdin.
|
2349
|
+
# returns an array of string
|
2350
|
+
def _option_input_stdin
|
2351
|
+
[]
|
2352
|
+
end
|
2353
|
+
|
2354
|
+
# generates options specifying output to stdout.
|
2355
|
+
# returns an array of string
|
2356
|
+
def _option_output_stdout
|
2357
|
+
[]
|
2358
|
+
end
|
2359
|
+
end #class Simple
|
2360
|
+
|
2361
|
+
# mix-in module
|
2362
|
+
module WrapInputStdin
|
2363
|
+
private
|
2364
|
+
# Performs alignment for +str+.
|
2365
|
+
# The +str+ should be a string that can be recognized by the program.
|
2366
|
+
def _query_string(str, opt)
|
2367
|
+
_query_local(nil, opt, str)
|
2368
|
+
end
|
2369
|
+
end #module WrapInputStdin
|
2370
|
+
|
2371
|
+
# mix-in module
|
2372
|
+
module WrapInputTempfile
|
2373
|
+
private
|
2374
|
+
# Performs alignment for +str+.
|
2375
|
+
# The +str+ should be a string that can be recognized by the program.
|
2376
|
+
def _query_string(str, opt)
|
2377
|
+
begin
|
2378
|
+
tf_in = _prepare_tempfile(str)
|
2379
|
+
ret = _query_local(tf_in.path, opt, nil)
|
2380
|
+
ensure
|
2381
|
+
tf_in.close(true) if tf_in
|
2382
|
+
end
|
2383
|
+
ret
|
2384
|
+
end
|
2385
|
+
end #module WrapInputTempfile
|
2386
|
+
|
2387
|
+
# mix-in module
|
2388
|
+
module WrapOutputStdout
|
2389
|
+
private
|
2390
|
+
# Performs alignment by specified filenames
|
2391
|
+
def _query_local(fn_in, opt, data_stdin = nil)
|
2392
|
+
opt = _generate_options(fn_in, nil, opt)
|
2393
|
+
exec_local(opt, data_stdin)
|
2394
|
+
@output = @data_stdout
|
2395
|
+
@report = self.class::DEFAULT_PARSER.new(@output)
|
2396
|
+
@report
|
2397
|
+
end
|
2398
|
+
end #module WrapOutputStdout
|
2399
|
+
|
2400
|
+
# mix-in module
|
2401
|
+
module WrapOutputTempfile
|
2402
|
+
private
|
2403
|
+
# Performs alignment
|
2404
|
+
def _query_local(fn_in, opt, data_stdin = nil)
|
2405
|
+
begin
|
2406
|
+
tf_out = _prepare_tempfile()
|
2407
|
+
opt = _generate_options(fn_in, tf_out.path, opt)
|
2408
|
+
exec_local(opt, data_stdin)
|
2409
|
+
tf_out.open
|
2410
|
+
@output = tf_out.read
|
2411
|
+
ensure
|
2412
|
+
tf_out.close(true) if tf_out
|
2413
|
+
end
|
2414
|
+
@report = self.class::DEFAULT_PARSER.new(@output)
|
2415
|
+
@report
|
2416
|
+
end
|
2417
|
+
end #module WrapOutputTempfile
|
2418
|
+
|
2419
|
+
# Template class for alignment application factory.
|
2420
|
+
# The program needs:
|
2421
|
+
# input: file (cannot accept stdin), format = fasta format
|
2422
|
+
# output: stdout (parser should be specified by DEFAULT_PARSER)
|
2423
|
+
class FileInStdoutOut < Simple
|
2424
|
+
include Bio::Alignment::FactoryTemplate::WrapInputTempfile
|
2425
|
+
include Bio::Alignment::FactoryTemplate::WrapOutputStdout
|
2426
|
+
|
2427
|
+
private
|
2428
|
+
# generates options specifying that input is taken from stdin.
|
2429
|
+
# returns an array of string
|
2430
|
+
def _option_input_stdin
|
2431
|
+
raise 'input is always a file'
|
2432
|
+
end
|
2433
|
+
end #class FileInStdoutOut
|
2434
|
+
|
2435
|
+
# Template class for alignment application factory.
|
2436
|
+
# The program needs:
|
2437
|
+
# input: stdin or file, format = fasta format
|
2438
|
+
# output: file (parser should be specified by DEFAULT_PARSER)
|
2439
|
+
class StdinInFileOut < Simple
|
2440
|
+
include Bio::Alignment::FactoryTemplate::WrapInputStdin
|
2441
|
+
include Bio::Alignment::FactoryTemplate::WrapOutputTempfile
|
2442
|
+
|
2443
|
+
private
|
2444
|
+
# generates options specifying output to stdout.
|
2445
|
+
# returns an array of string
|
2446
|
+
def _option_output_stdout
|
2447
|
+
raise 'output is always a file'
|
2448
|
+
end
|
2449
|
+
end #class StdinInFileOut
|
2450
|
+
|
2451
|
+
# Template class for alignment application factory.
|
2452
|
+
# The program needs:
|
2453
|
+
# input: file (cannot accept stdin), format = fasta format
|
2454
|
+
# output: file (parser should be specified by DEFAULT_PARSER)
|
2455
|
+
class FileInFileOut < Simple
|
2456
|
+
include Bio::Alignment::FactoryTemplate::WrapInputTempfile
|
2457
|
+
include Bio::Alignment::FactoryTemplate::WrapOutputTempfile
|
2458
|
+
|
2459
|
+
private
|
2460
|
+
# generates options specifying that input is taken from stdin.
|
2461
|
+
# returns an array of string
|
2462
|
+
def _option_input_stdin
|
2463
|
+
raise 'input is always a file'
|
2464
|
+
end
|
2465
|
+
|
2466
|
+
# generates options specifying output to stdout.
|
2467
|
+
# returns an array of string
|
2468
|
+
def _option_output_stdout
|
2469
|
+
raise 'output is always a file'
|
2470
|
+
end
|
2471
|
+
end #class FileInFileOut
|
2472
|
+
|
2473
|
+
# Template class for alignment application factory.
|
2474
|
+
# The program needs:
|
2475
|
+
# input: file (cannot accept stdin), format = fasta format
|
2476
|
+
# output: file (parser should be specified by DEFAULT_PARSER)
|
2477
|
+
# Tree (*.dnd) output is also supported.
|
2478
|
+
class FileInFileOutWithTree < FileInFileOut
|
2479
|
+
|
2480
|
+
# alignment guide tree generated by the program (*.dnd file)
|
2481
|
+
attr_reader :output_dnd
|
2482
|
+
|
2483
|
+
def reset
|
2484
|
+
@output_dnd = nil
|
2485
|
+
super
|
2486
|
+
end
|
2487
|
+
|
2488
|
+
private
|
2489
|
+
# Performs alignment
|
2490
|
+
def _query_local(fn_in, opt, data_stdin = nil)
|
2491
|
+
begin
|
2492
|
+
tf_dnd = _prepare_tempfile()
|
2493
|
+
opt = opt + _option_output_dndfile(tf_dnd.path)
|
2494
|
+
ret = super(fn_in, opt, data_stdin)
|
2495
|
+
tf_dnd.open
|
2496
|
+
@output_dnd = tf_dnd.read
|
2497
|
+
ensure
|
2498
|
+
tf_dnd.close(true) if tf_dnd
|
2499
|
+
end
|
2500
|
+
ret
|
2501
|
+
end
|
2502
|
+
|
2503
|
+
# generates options specifying output tree file (*.dnd).
|
2504
|
+
# returns an array of string
|
2505
|
+
def _option_output_dndfile
|
2506
|
+
raise NotImplementedError
|
2507
|
+
end
|
2508
|
+
end #class FileInFileOutWithTree
|
2509
|
+
|
2510
|
+
end #module FactoryTemplate
|
2511
|
+
|
2512
|
+
|
1903
2513
|
end #module Alignment
|
1904
2514
|
|
1905
2515
|
end #module Bio
|