bio 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +3421 -0
- data/KNOWN_ISSUES.rdoc +88 -0
- data/README.rdoc +252 -0
- data/README_DEV.rdoc +285 -0
- data/Rakefile +143 -0
- data/bin/bioruby +0 -0
- data/bin/br_biofetch.rb +0 -0
- data/bin/br_bioflat.rb +12 -1
- data/bin/br_biogetseq.rb +0 -0
- data/bin/br_pmfetch.rb +4 -3
- data/bioruby.gemspec +477 -0
- data/bioruby.gemspec.erb +117 -0
- data/doc/Changes-0.7.rd +7 -0
- data/doc/Changes-1.3.rdoc +239 -0
- data/doc/Tutorial.rd +296 -184
- data/doc/Tutorial.rd.html +1031 -0
- data/doc/Tutorial.rd.ja +111 -45
- data/doc/Tutorial.rd.ja.html +2225 -0
- data/doc/bioruby.css +281 -0
- data/extconf.rb +2 -0
- data/lib/bio.rb +29 -4
- data/lib/bio/appl/blast.rb +306 -121
- data/lib/bio/appl/blast/ddbj.rb +142 -0
- data/lib/bio/appl/blast/format0.rb +35 -25
- data/lib/bio/appl/blast/format8.rb +2 -2
- data/lib/bio/appl/blast/genomenet.rb +263 -0
- data/lib/bio/appl/blast/ncbioptions.rb +220 -0
- data/lib/bio/appl/blast/remote.rb +106 -0
- data/lib/bio/appl/blast/report.rb +260 -9
- data/lib/bio/appl/blast/rexml.rb +12 -5
- data/lib/bio/appl/blast/rpsblast.rb +277 -0
- data/lib/bio/appl/blast/wublast.rb +133 -12
- data/lib/bio/appl/blast/xmlparser.rb +35 -18
- data/lib/bio/appl/blat/report.rb +46 -5
- data/lib/bio/appl/emboss.rb +62 -13
- data/lib/bio/appl/fasta.rb +9 -11
- data/lib/bio/appl/genscan/report.rb +3 -3
- data/lib/bio/appl/hmmer.rb +1 -1
- data/lib/bio/appl/hmmer/report.rb +10 -10
- data/lib/bio/appl/paml/baseml.rb +95 -0
- data/lib/bio/appl/paml/baseml/report.rb +32 -0
- data/lib/bio/appl/paml/codeml.rb +242 -0
- data/lib/bio/appl/paml/codeml/rates.rb +67 -0
- data/lib/bio/appl/paml/codeml/report.rb +67 -0
- data/lib/bio/appl/paml/common.rb +348 -0
- data/lib/bio/appl/paml/common_report.rb +38 -0
- data/lib/bio/appl/paml/yn00.rb +103 -0
- data/lib/bio/appl/paml/yn00/report.rb +32 -0
- data/lib/bio/appl/psort.rb +2 -2
- data/lib/bio/appl/pts1.rb +5 -5
- data/lib/bio/appl/tmhmm/report.rb +10 -1
- data/lib/bio/command.rb +297 -41
- data/lib/bio/compat/features.rb +157 -0
- data/lib/bio/compat/references.rb +128 -0
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
- data/lib/bio/db/biosql/sequence.rb +508 -0
- data/lib/bio/db/embl/common.rb +28 -12
- data/lib/bio/db/embl/embl.rb +107 -9
- data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
- data/lib/bio/db/embl/format_embl.rb +190 -0
- data/lib/bio/db/embl/sptr.rb +15 -16
- data/lib/bio/db/fantom.rb +6 -8
- data/lib/bio/db/fasta.rb +10 -507
- data/lib/bio/db/fasta/defline.rb +532 -0
- data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
- data/lib/bio/db/fasta/format_fasta.rb +97 -0
- data/lib/bio/db/genbank/common.rb +25 -8
- data/lib/bio/db/genbank/format_genbank.rb +187 -0
- data/lib/bio/db/genbank/genbank.rb +36 -1
- data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
- data/lib/bio/db/gff.rb +1791 -119
- data/lib/bio/db/kegg/glycan.rb +2 -6
- data/lib/bio/db/lasergene.rb +3 -3
- data/lib/bio/db/medline.rb +4 -1
- data/lib/bio/db/newick.rb +10 -10
- data/lib/bio/db/pdb/chain.rb +6 -2
- data/lib/bio/db/pdb/pdb.rb +12 -3
- data/lib/bio/db/rebase.rb +7 -8
- data/lib/bio/db/soft.rb +3 -3
- data/lib/bio/feature.rb +1 -88
- data/lib/bio/io/biosql/biodatabase.rb +64 -0
- data/lib/bio/io/biosql/bioentry.rb +29 -0
- data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
- data/lib/bio/io/biosql/bioentry_path.rb +12 -0
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
- data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
- data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
- data/lib/bio/io/biosql/biosequence.rb +11 -0
- data/lib/bio/io/biosql/comment.rb +7 -0
- data/lib/bio/io/biosql/config/database.yml +20 -0
- data/lib/bio/io/biosql/dbxref.rb +13 -0
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
- data/lib/bio/io/biosql/location.rb +32 -0
- data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
- data/lib/bio/io/biosql/ontology.rb +10 -0
- data/lib/bio/io/biosql/reference.rb +9 -0
- data/lib/bio/io/biosql/seqfeature.rb +32 -0
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
- data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
- data/lib/bio/io/biosql/taxon.rb +12 -0
- data/lib/bio/io/biosql/taxon_name.rb +9 -0
- data/lib/bio/io/biosql/term.rb +27 -0
- data/lib/bio/io/biosql/term_dbxref.rb +11 -0
- data/lib/bio/io/biosql/term_path.rb +12 -0
- data/lib/bio/io/biosql/term_relationship.rb +13 -0
- data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
- data/lib/bio/io/biosql/term_synonym.rb +10 -0
- data/lib/bio/io/das.rb +7 -7
- data/lib/bio/io/ddbjxml.rb +57 -0
- data/lib/bio/io/ensembl.rb +2 -2
- data/lib/bio/io/fetch.rb +28 -14
- data/lib/bio/io/flatfile.rb +17 -853
- data/lib/bio/io/flatfile/autodetection.rb +545 -0
- data/lib/bio/io/flatfile/buffer.rb +237 -0
- data/lib/bio/io/flatfile/index.rb +17 -7
- data/lib/bio/io/flatfile/indexer.rb +30 -12
- data/lib/bio/io/flatfile/splitter.rb +297 -0
- data/lib/bio/io/hinv.rb +442 -0
- data/lib/bio/io/keggapi.rb +2 -2
- data/lib/bio/io/ncbirest.rb +733 -0
- data/lib/bio/io/pubmed.rb +34 -80
- data/lib/bio/io/registry.rb +2 -2
- data/lib/bio/io/sql.rb +178 -357
- data/lib/bio/io/togows.rb +458 -0
- data/lib/bio/location.rb +106 -11
- data/lib/bio/pathway.rb +120 -14
- data/lib/bio/reference.rb +115 -101
- data/lib/bio/sequence.rb +164 -183
- data/lib/bio/sequence/adapter.rb +108 -0
- data/lib/bio/sequence/common.rb +22 -45
- data/lib/bio/sequence/compat.rb +2 -2
- data/lib/bio/sequence/dblink.rb +54 -0
- data/lib/bio/sequence/format.rb +254 -77
- data/lib/bio/sequence/format_raw.rb +23 -0
- data/lib/bio/shell.rb +3 -1
- data/lib/bio/shell/core.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +33 -4
- data/lib/bio/shell/plugin/ncbirest.rb +64 -0
- data/lib/bio/shell/plugin/togows.rb +40 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
- data/lib/bio/tree.rb +4 -2
- data/lib/bio/util/color_scheme.rb +2 -2
- data/lib/bio/util/contingency_table.rb +2 -2
- data/lib/bio/util/restriction_enzyme.rb +2 -2
- data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
- data/lib/bio/version.rb +25 -0
- data/rdoc.zsh +8 -0
- data/sample/any2fasta.rb +0 -0
- data/sample/biofetch.rb +0 -0
- data/sample/dbget +0 -0
- data/sample/demo_sequence.rb +158 -0
- data/sample/enzymes.rb +0 -0
- data/sample/fasta2tab.rb +0 -0
- data/sample/fastagrep.rb +72 -0
- data/sample/fastasort.rb +54 -0
- data/sample/fsplit.rb +0 -0
- data/sample/gb2fasta.rb +2 -3
- data/sample/gb2tab.rb +0 -0
- data/sample/gbtab2mysql.rb +0 -0
- data/sample/genes2nuc.rb +0 -0
- data/sample/genes2pep.rb +0 -0
- data/sample/genes2tab.rb +0 -0
- data/sample/genome2rb.rb +0 -0
- data/sample/genome2tab.rb +0 -0
- data/sample/goslim.rb +0 -0
- data/sample/gt2fasta.rb +0 -0
- data/sample/na2aa.rb +34 -0
- data/sample/pmfetch.rb +0 -0
- data/sample/pmsearch.rb +0 -0
- data/sample/ssearch2tab.rb +0 -0
- data/sample/tfastx2tab.rb +0 -0
- data/sample/vs-genes.rb +0 -0
- data/setup.rb +1596 -0
- data/test/data/blast/blastp-multi.m7 +188 -0
- data/test/data/command/echoarg2.bat +1 -0
- data/test/data/paml/codeml/control_file.txt +30 -0
- data/test/data/paml/codeml/output.txt +78 -0
- data/test/data/paml/codeml/rates +217 -0
- data/test/data/rpsblast/misc.rpsblast +193 -0
- data/test/data/soft/GDS100_partial.soft +0 -0
- data/test/data/soft/GSE3457_family_partial.soft +0 -0
- data/test/functional/bio/appl/test_pts1.rb +115 -0
- data/test/functional/bio/io/test_ensembl.rb +123 -80
- data/test/functional/bio/io/test_togows.rb +267 -0
- data/test/functional/bio/sequence/test_output_embl.rb +51 -0
- data/test/functional/bio/test_command.rb +301 -0
- data/test/runner.rb +17 -1
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
- data/test/unit/bio/appl/blast/test_report.rb +753 -35
- data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
- data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
- data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
- data/test/unit/bio/appl/test_blast.rb +135 -4
- data/test/unit/bio/appl/test_fasta.rb +2 -2
- data/test/unit/bio/appl/test_pts1.rb +1 -64
- data/test/unit/bio/db/embl/test_common.rb +15 -15
- data/test/unit/bio/db/embl/test_embl.rb +4 -4
- data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
- data/test/unit/bio/db/embl/test_sptr.rb +38 -1
- data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
- data/test/unit/bio/db/test_gff.rb +1151 -25
- data/test/unit/bio/db/test_medline.rb +127 -0
- data/test/unit/bio/db/test_nexus.rb +5 -1
- data/test/unit/bio/db/test_prosite.rb +4 -4
- data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
- data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
- data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
- data/test/unit/bio/io/test_ddbjxml.rb +8 -3
- data/test/unit/bio/io/test_fastacmd.rb +5 -5
- data/test/unit/bio/io/test_flatfile.rb +357 -106
- data/test/unit/bio/io/test_soapwsdl.rb +2 -2
- data/test/unit/bio/io/test_togows.rb +161 -0
- data/test/unit/bio/sequence/test_common.rb +210 -11
- data/test/unit/bio/sequence/test_compat.rb +3 -3
- data/test/unit/bio/sequence/test_dblink.rb +58 -0
- data/test/unit/bio/sequence/test_na.rb +2 -2
- data/test/unit/bio/test_command.rb +111 -50
- data/test/unit/bio/test_feature.rb +29 -1
- data/test/unit/bio/test_location.rb +566 -6
- data/test/unit/bio/test_pathway.rb +91 -65
- data/test/unit/bio/test_reference.rb +67 -13
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
- metadata +202 -167
- data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/appl/paml/baseml.rb - Wrapper for running PAML program baseml
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2008
|
|
5
|
+
# Naohisa Goto <ng@bioruby.org>
|
|
6
|
+
#
|
|
7
|
+
# License:: The Ruby License
|
|
8
|
+
#
|
|
9
|
+
# == Description
|
|
10
|
+
#
|
|
11
|
+
# This file contains Bio::PAML::Baseml, a wrapper class running baseml.
|
|
12
|
+
#
|
|
13
|
+
# == References
|
|
14
|
+
#
|
|
15
|
+
# * http://abacus.gene.ucl.ac.uk/software/paml.html
|
|
16
|
+
#
|
|
17
|
+
|
|
18
|
+
require 'bio/appl/paml/common'
|
|
19
|
+
|
|
20
|
+
module Bio::PAML
|
|
21
|
+
|
|
22
|
+
# == Description
|
|
23
|
+
#
|
|
24
|
+
# Bio::PAML::Baseml is a wrapper for running PAML baseml program.
|
|
25
|
+
#
|
|
26
|
+
# Because most of the methods in this class are inherited from
|
|
27
|
+
# Bio::PAML::Common, see documents of Bio::PAML::Common for details.
|
|
28
|
+
#
|
|
29
|
+
# == Examples
|
|
30
|
+
#
|
|
31
|
+
# Example 1:
|
|
32
|
+
#
|
|
33
|
+
# require 'bio'
|
|
34
|
+
# # Reads multi-fasta formatted file and gets a Bio::Alignment object.
|
|
35
|
+
# alignment = Bio::FlatFile.open(Bio::Alignment::MultiFastaFormat,
|
|
36
|
+
# 'example.fst').alignment
|
|
37
|
+
# # Reads newick tree from a file
|
|
38
|
+
# tree = Bio::FlatFile.open(Bio::Newick, 'example.tree').tree
|
|
39
|
+
# # Creates a Baseml object
|
|
40
|
+
# baseml = Bio::PAML::Baseml.new
|
|
41
|
+
# # Sets parameters
|
|
42
|
+
# baseml.parameters[:runmode] = 0
|
|
43
|
+
# baseml.parameters[:RateAncestor] = 1
|
|
44
|
+
# # You can also set many parameters at a time.
|
|
45
|
+
# baseml.parameters.update({ :alpha => 0.5, :fix_alpha => 0 })
|
|
46
|
+
# # Executes baseml with the alignment and the tree
|
|
47
|
+
# report = baseml.query(alignment, tree)
|
|
48
|
+
#
|
|
49
|
+
class Baseml < Common
|
|
50
|
+
|
|
51
|
+
autoload :Report, 'bio/appl/paml/baseml/report'
|
|
52
|
+
|
|
53
|
+
# Default program name
|
|
54
|
+
DEFAULT_PROGRAM = 'baseml'.freeze
|
|
55
|
+
|
|
56
|
+
# Default parameters when running baseml.
|
|
57
|
+
#
|
|
58
|
+
# The parameters whose values are different from the baseml defalut
|
|
59
|
+
# value (described in pamlDOC.pdf) in PAML 4.1 are:
|
|
60
|
+
# seqfile, outfile, treefile, ndata, noisy, verbose
|
|
61
|
+
#
|
|
62
|
+
DEFAULT_PARAMETERS = {
|
|
63
|
+
# Essential argumemts
|
|
64
|
+
:seqfile => nil,
|
|
65
|
+
:outfile => nil,
|
|
66
|
+
# Optional arguments
|
|
67
|
+
:treefile => nil,
|
|
68
|
+
:noisy => 0,
|
|
69
|
+
:verbose => 1,
|
|
70
|
+
:runmode => 0,
|
|
71
|
+
:model => 5,
|
|
72
|
+
:Mgene => 0,
|
|
73
|
+
:ndata => 1,
|
|
74
|
+
:clock => 0,
|
|
75
|
+
:fix_kappa => 0,
|
|
76
|
+
:kappa => 2.5,
|
|
77
|
+
:fix_alpha => 1,
|
|
78
|
+
:alpha => 0.0,
|
|
79
|
+
:Malpha => 0,
|
|
80
|
+
:ncatG => 5,
|
|
81
|
+
:fix_rho => 1,
|
|
82
|
+
:rho => 0.0,
|
|
83
|
+
:nparK => 0,
|
|
84
|
+
:nhomo => 0,
|
|
85
|
+
:getSE => 0,
|
|
86
|
+
:RateAncestor => 0,
|
|
87
|
+
:Small_Diff => 1e-6,
|
|
88
|
+
:cleandata => 1,
|
|
89
|
+
:fix_blength => 0,
|
|
90
|
+
:method => 0
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
end #class Baseml
|
|
94
|
+
end #module Bio::PAML
|
|
95
|
+
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/appl/paml/baseml/report.rb - parser class for PAML program baseml
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2008
|
|
5
|
+
# Naohisa Goto <ng@bioruby.org>
|
|
6
|
+
#
|
|
7
|
+
# License:: The Ruby License
|
|
8
|
+
#
|
|
9
|
+
# == Description
|
|
10
|
+
#
|
|
11
|
+
# This file contains Bio::PAML::Baseml::Report, a parser class for a result
|
|
12
|
+
# of baseml.
|
|
13
|
+
#
|
|
14
|
+
# == References
|
|
15
|
+
#
|
|
16
|
+
# * http://abacus.gene.ucl.ac.uk/software/paml.html
|
|
17
|
+
#
|
|
18
|
+
|
|
19
|
+
require 'bio/appl/paml/baseml'
|
|
20
|
+
|
|
21
|
+
module Bio::PAML
|
|
22
|
+
class Baseml
|
|
23
|
+
|
|
24
|
+
# UNDER CONSTRUCTION.
|
|
25
|
+
#
|
|
26
|
+
# Bio::PAML::Baseml::Report is a parser class for a baseml result.
|
|
27
|
+
#
|
|
28
|
+
class Report < Bio::PAML::Common::Report
|
|
29
|
+
end #class Report
|
|
30
|
+
|
|
31
|
+
end #class Baseml
|
|
32
|
+
end #module Bio::PAML
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/appl/paml/codeml.rb - Wrapper for running PAML program codeml
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2008
|
|
5
|
+
# Michael D. Barton <mail@michaelbarton.me.uk>,
|
|
6
|
+
# Naohisa Goto <ng@bioruby.org>
|
|
7
|
+
#
|
|
8
|
+
# License:: The Ruby License
|
|
9
|
+
#
|
|
10
|
+
# == Description
|
|
11
|
+
#
|
|
12
|
+
# This file contains a wrapper for running the CODEML tool for estimating evolutionary rate
|
|
13
|
+
#
|
|
14
|
+
# == References
|
|
15
|
+
#
|
|
16
|
+
# * http://abacus.gene.ucl.ac.uk/software/paml.html
|
|
17
|
+
#
|
|
18
|
+
|
|
19
|
+
require 'bio/appl/paml/common'
|
|
20
|
+
|
|
21
|
+
module Bio
|
|
22
|
+
module PAML
|
|
23
|
+
|
|
24
|
+
# == Description
|
|
25
|
+
#
|
|
26
|
+
# Bio::PAML::Codeml is a wrapper for estimating evolutionary rate using the CODEML
|
|
27
|
+
# tool. The class provides methods for generating the necessary configuration
|
|
28
|
+
# file, and running codeml with the specified binary. Codeml output is
|
|
29
|
+
# returned when codeml is run. Bio::PAML::Codeml::Report and Bio::PAML::Codeml::Rates
|
|
30
|
+
# provide simple classes for parsing and accessing the Codeml report and
|
|
31
|
+
# rates files respectively.
|
|
32
|
+
#
|
|
33
|
+
# == Examples
|
|
34
|
+
#
|
|
35
|
+
# Example 1:
|
|
36
|
+
#
|
|
37
|
+
# require 'bio'
|
|
38
|
+
# # Reads multi-fasta formatted file and gets a Bio::Alignment object.
|
|
39
|
+
# alignment = Bio::FlatFile.open(Bio::Alignment::MultiFastaFormat,
|
|
40
|
+
# 'example.fst').alignment
|
|
41
|
+
# # Reads newick tree from a file
|
|
42
|
+
# tree = Bio::FlatFile.open(Bio::Newick, 'example.tree').tree
|
|
43
|
+
# # Creates a Codeml object
|
|
44
|
+
# codeml = Bio::PAML::Codeml.new
|
|
45
|
+
# # Sets parameters
|
|
46
|
+
# codeml.parameters[:runmode] = 0
|
|
47
|
+
# codeml.parameters[:RateAncestor] = 1
|
|
48
|
+
# # You can also set many parameters at a time.
|
|
49
|
+
# codeml.parameters.update({ :alpha => 0.5, :fix_alpha => 0 })
|
|
50
|
+
# # Executes codeml with the alignment and the tree
|
|
51
|
+
# report = codeml.query(alignment, tree)
|
|
52
|
+
#
|
|
53
|
+
# Example 2 (Obsolete usage):
|
|
54
|
+
#
|
|
55
|
+
# # Create a control file, setting some Codeml options
|
|
56
|
+
# # Default parameters are used otherwise, see RDoc for defaults
|
|
57
|
+
# # The names of the parameters correspond to those specified
|
|
58
|
+
# # in the Codeml documentation
|
|
59
|
+
# control_file = Tempfile.new('codeml_ctl')
|
|
60
|
+
# control_file.close(false)
|
|
61
|
+
# # Prepare output file as a temporary file
|
|
62
|
+
# output_file = Tempfile.new('codeml_test')
|
|
63
|
+
# output_file.close(false)
|
|
64
|
+
# Bio::PAML::Codeml.create_control_file(config_file.path, {
|
|
65
|
+
# :model => 1,
|
|
66
|
+
# :fix_kappa => 1,
|
|
67
|
+
# :aaRatefile => TEST_DATA + '/wag.dat',
|
|
68
|
+
# :seqfile => TEST_DATA + '/abglobin.aa',
|
|
69
|
+
# :treefile => TEST_DATA + '/abglobin.trees',
|
|
70
|
+
# :outfile => output_file.path,
|
|
71
|
+
# })
|
|
72
|
+
#
|
|
73
|
+
# # Create an instance of Codeml specifying where the codeml binary is
|
|
74
|
+
# codeml = Bio::PAML::Codeml.new('/path/to/codeml')
|
|
75
|
+
#
|
|
76
|
+
# # Run codeml using a control file
|
|
77
|
+
# # Returns the command line output
|
|
78
|
+
# codeml_output = codeml.run(control_file)
|
|
79
|
+
#
|
|
80
|
+
class Codeml < Common
|
|
81
|
+
|
|
82
|
+
autoload :Report, 'bio/appl/paml/codeml/report'
|
|
83
|
+
autoload :Rates, 'bio/appl/paml/codeml/rates'
|
|
84
|
+
|
|
85
|
+
# Default program name
|
|
86
|
+
DEFAULT_PROGRAM = 'codeml'.freeze
|
|
87
|
+
|
|
88
|
+
# Default parameters when running codeml.
|
|
89
|
+
#
|
|
90
|
+
# The parameters whose values are different from the codeml defalut
|
|
91
|
+
# value (described in pamlDOC.pdf) in PAML 4.1 are:
|
|
92
|
+
# seqfile, outfile, treefile, ndata, noisy, verbose, cleandata
|
|
93
|
+
#
|
|
94
|
+
DEFAULT_PARAMETERS = {
|
|
95
|
+
# Essential argumemts
|
|
96
|
+
:seqfile => nil,
|
|
97
|
+
:outfile => nil,
|
|
98
|
+
# Optional arguments
|
|
99
|
+
:treefile => nil,
|
|
100
|
+
:noisy => 0,
|
|
101
|
+
:verbose => 1,
|
|
102
|
+
:runmode => 0,
|
|
103
|
+
:seqtype => 2,
|
|
104
|
+
:CodonFreq => 2,
|
|
105
|
+
:ndata => 1,
|
|
106
|
+
:clock => 0,
|
|
107
|
+
:aaDist => 0,
|
|
108
|
+
:aaRatefile => 'wag.dat',
|
|
109
|
+
:model => 2,
|
|
110
|
+
:NSsites => 0,
|
|
111
|
+
:icode => 0,
|
|
112
|
+
:Mgene => 0,
|
|
113
|
+
:fix_kappa => 0,
|
|
114
|
+
:kappa => 2,
|
|
115
|
+
:fix_omega => 0,
|
|
116
|
+
:omega => 0.4,
|
|
117
|
+
:fix_alpha => 0,
|
|
118
|
+
:alpha => 0.0,
|
|
119
|
+
:Malpha => 0,
|
|
120
|
+
:ncatG => 3,
|
|
121
|
+
:fix_rho => 1,
|
|
122
|
+
:rho => 0.0,
|
|
123
|
+
:getSE => 0,
|
|
124
|
+
:RateAncestor => 0,
|
|
125
|
+
:Small_Diff => 0.5e-6,
|
|
126
|
+
:cleandata => 1,
|
|
127
|
+
:fix_blength => 0,
|
|
128
|
+
:method => 0
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
# OBSOLETE. This method should not be used.
|
|
132
|
+
# Instead, use parameters.
|
|
133
|
+
def options
|
|
134
|
+
warn 'The method Codeml#options will be changed to be used for command line arguments in the future. Instead, use Codeml#parameters.'
|
|
135
|
+
parameters
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# OBSOLETE. This method should not be used.
|
|
139
|
+
# Instead, use parameters=(hash).
|
|
140
|
+
def options=(hash)
|
|
141
|
+
warn 'The method Codeml#options=() will be changed to be used for command line arguments in the future. Instead, use Codeml#parameters=().'
|
|
142
|
+
self.parameters=(hash)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Obsolete. This method will be removed in the future.
|
|
146
|
+
# Helper method for creating a codeml control file.
|
|
147
|
+
# Note that default parameters are automatically merged.
|
|
148
|
+
def self.create_control_file(parameters, filename)
|
|
149
|
+
parameters = DEFAULT_PARAMETERS.merge(parameters)
|
|
150
|
+
File.open(filename, 'w') do |file|
|
|
151
|
+
parameters.each do |key, value|
|
|
152
|
+
file.puts "#{key.to_s} = #{value.to_s}" if value
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
filename
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# OBSOLETE. This method will soon be removed.
|
|
159
|
+
# Instead, use create_control_file(parameters, filename).
|
|
160
|
+
def self.create_config_file(parameters, filename)
|
|
161
|
+
warn "The method Codeml.create_config_file(parameters, filename) will soon be removed. Instead, use Codeml.create_control_file(filename, parameters)."
|
|
162
|
+
create_control_file(parameters, filename)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
# Runs the program on the internal parameters with the specified
|
|
167
|
+
# sequence alignment and tree.
|
|
168
|
+
#
|
|
169
|
+
# Note that parameters[:seqfile] and parameters[:outfile]
|
|
170
|
+
# are always modified, and parameters[:treefile] and
|
|
171
|
+
# parameters[:aaRatefile] are modified when tree and aarate are
|
|
172
|
+
# specified respectively.
|
|
173
|
+
#
|
|
174
|
+
# For other important information, see the document of
|
|
175
|
+
# Bio::PAML::Common#query.
|
|
176
|
+
#
|
|
177
|
+
# ---
|
|
178
|
+
# *Arguments*:
|
|
179
|
+
# * (required) _alignment_: Bio::Alignment object or similar object
|
|
180
|
+
# * (optional) _tree_: Bio::Tree object
|
|
181
|
+
# * (optional) _aarate_: String or nil
|
|
182
|
+
# *Returns*:: Report object
|
|
183
|
+
def query(alignment, tree = nil, aarate = nil)
|
|
184
|
+
begin
|
|
185
|
+
aaratefile = prepare_aaratefile(aarate)
|
|
186
|
+
ret = super(alignment, tree)
|
|
187
|
+
ensure
|
|
188
|
+
finalize_aaratefile(aaratefile)
|
|
189
|
+
end
|
|
190
|
+
ret
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# Runs the program on the internal parameters with the specified
|
|
194
|
+
# sequence alignment data string and tree data string.
|
|
195
|
+
#
|
|
196
|
+
# Note that parameters[:outfile] is always modified, and
|
|
197
|
+
# parameters[:seqfile], parameters[:treefile], and
|
|
198
|
+
# parameters[:aaRatefile] are modified when
|
|
199
|
+
# alignment, tree, and aarate are specified respectively.
|
|
200
|
+
#
|
|
201
|
+
# It raises RuntimeError if seqfile is not specified in the argument
|
|
202
|
+
# or in the parameter.
|
|
203
|
+
#
|
|
204
|
+
# For other important information, see the document of query method.
|
|
205
|
+
#
|
|
206
|
+
# ---
|
|
207
|
+
# *Arguments*:
|
|
208
|
+
# * (optional) _alignment_: String
|
|
209
|
+
# * (optional) _tree_: String or nil
|
|
210
|
+
# * (optional) _aarate_: String or nil
|
|
211
|
+
# *Returns*:: contents of output file (String)
|
|
212
|
+
def query_by_string(alignment = nil, tree = nil, aarate = nil)
|
|
213
|
+
begin
|
|
214
|
+
aaratefile = prepare_aaratefile(aarate)
|
|
215
|
+
ret = super(alignment, tree)
|
|
216
|
+
ensure
|
|
217
|
+
finalize_aaratefile(aaratefile)
|
|
218
|
+
end
|
|
219
|
+
ret
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
private
|
|
223
|
+
|
|
224
|
+
# (private) prepares temporary file for aaRatefile if needed
|
|
225
|
+
def prepare_aaratefile(aarate)
|
|
226
|
+
if aarate then
|
|
227
|
+
aaratefile = Tempfile.new('codeml_aarate')
|
|
228
|
+
aaratefile.print aarate
|
|
229
|
+
aaratefile.close(false)
|
|
230
|
+
self.parameters[:aaRatefile] = aaratefile.path
|
|
231
|
+
end
|
|
232
|
+
aaratefile
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# (private) removes temporary file for aaRatefile if needed
|
|
236
|
+
def finalize_aaratefile(aaratefile)
|
|
237
|
+
aaratefile.close(true) if aaratefile
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
end # End class Codeml
|
|
241
|
+
end # End module PAML
|
|
242
|
+
end # End module Bio
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/appl/paml/codeml/rates.rb - Codeml rates report file parser
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2008 Michael D. Barton <mail@michaelbarton.me.uk>
|
|
5
|
+
#
|
|
6
|
+
# License:: The Ruby License
|
|
7
|
+
#
|
|
8
|
+
# == Description
|
|
9
|
+
#
|
|
10
|
+
# This file contains a class that implement a simple interface to Codeml rates estimation file
|
|
11
|
+
#
|
|
12
|
+
# == References
|
|
13
|
+
#
|
|
14
|
+
# * http://abacus.gene.ucl.ac.uk/software/paml.html
|
|
15
|
+
#
|
|
16
|
+
|
|
17
|
+
require 'delegate'
|
|
18
|
+
require 'bio/appl/paml/codeml'
|
|
19
|
+
|
|
20
|
+
module Bio::PAML
|
|
21
|
+
class Codeml
|
|
22
|
+
|
|
23
|
+
# == Description
|
|
24
|
+
#
|
|
25
|
+
# A simple class for parsing the codeml rates file.
|
|
26
|
+
#
|
|
27
|
+
# WARNING: The order of the parsed data should be correct, however will
|
|
28
|
+
# not necessarily correspond to the position in the alignment. For instance
|
|
29
|
+
# codeml ignores columns that contains gaps, and therefore there will not
|
|
30
|
+
# be any estimated rate data.
|
|
31
|
+
#
|
|
32
|
+
# == Usage
|
|
33
|
+
#
|
|
34
|
+
# site_rates = Bio::PAML::Codeml::Rates.new(File.open(@tmp_dir + "/rates").read)
|
|
35
|
+
# site_rate.first[:freq] # => Number of times that column appears
|
|
36
|
+
# site_rate.[5][:rate] # => Estimated rate of evolution
|
|
37
|
+
# site_rate.last[:data] # => The content of the column, as a string
|
|
38
|
+
#
|
|
39
|
+
# # This class delegates to an array, so will respond to all array methods
|
|
40
|
+
# site_rates.max {|x,y| x[:rate] <=> y[:rate] } # => Fastest evolving column
|
|
41
|
+
# site_rates.detect {|x| x[:freq] > 1 } # => Columns appearing more than once
|
|
42
|
+
class Rates < DelegateClass(Array)
|
|
43
|
+
|
|
44
|
+
def initialize(rates)
|
|
45
|
+
super(parse_rates(rates))
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
def parse_rates(text)
|
|
51
|
+
re = /\s+(\d+)\s+(\d+)\s+([A-Z\*]+)\s+(\d+\.\d+)\s+(\d)/
|
|
52
|
+
array = Array.new
|
|
53
|
+
text.each_line do |line|
|
|
54
|
+
if re =~ line
|
|
55
|
+
match = Regexp.last_match
|
|
56
|
+
array[match[1].to_i] = {
|
|
57
|
+
:freq => match[2].to_i,
|
|
58
|
+
:data => match[3],
|
|
59
|
+
:rate => match[4].to_f }
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
array.compact
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/appl/paml/codeml/report.rb - Codeml report parser
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2008 Michael D. Barton <mail@michaelbarton.me.uk>
|
|
5
|
+
#
|
|
6
|
+
# License:: The Ruby License
|
|
7
|
+
#
|
|
8
|
+
# == Description
|
|
9
|
+
#
|
|
10
|
+
# This file contains a class that implement a simple interface to Codeml output file
|
|
11
|
+
#
|
|
12
|
+
# == References
|
|
13
|
+
#
|
|
14
|
+
# * http://abacus.gene.ucl.ac.uk/software/paml.html
|
|
15
|
+
#
|
|
16
|
+
|
|
17
|
+
require 'bio/appl/paml/codeml'
|
|
18
|
+
|
|
19
|
+
module Bio::PAML
|
|
20
|
+
class Codeml
|
|
21
|
+
|
|
22
|
+
# == Description
|
|
23
|
+
#
|
|
24
|
+
# A simple class for parsing codeml output.
|
|
25
|
+
#
|
|
26
|
+
# WARNING: This data is parsed using a regex from the output file, and
|
|
27
|
+
# so will take the first result found. If using multiple tree's, your
|
|
28
|
+
# milage may vary. See the source for the regular expressions.
|
|
29
|
+
#
|
|
30
|
+
# require 'bio'
|
|
31
|
+
#
|
|
32
|
+
# report = Bio::PAML::Codeml::Report.new(File.open(codeml_output_file).read)
|
|
33
|
+
# report.gene_rate # => Rate of gene evolution as defined be alpha
|
|
34
|
+
# report.tree_lengh # => Estimated phylogetic tree length
|
|
35
|
+
class Report < Bio::PAML::Common::Report
|
|
36
|
+
|
|
37
|
+
attr_reader :tree_log_likelihood, :tree_length, :alpha, :tree
|
|
38
|
+
|
|
39
|
+
def initialize(codeml_report)
|
|
40
|
+
@tree_log_likelihood = pull_tree_log_likelihood(codeml_report)
|
|
41
|
+
@tree_length = pull_tree_length(codeml_report)
|
|
42
|
+
@alpha = pull_alpha(codeml_report)
|
|
43
|
+
@tree = pull_tree(codeml_report)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def pull_tree_log_likelihood(text)
|
|
49
|
+
text[/lnL\(.+\):\s+(-?\d+(\.\d+)?)/,1].to_f
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def pull_tree_length(text)
|
|
54
|
+
text[/tree length\s+=\s+ (-?\d+(\.\d+)?)/,1].to_f
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def pull_alpha(text)
|
|
58
|
+
text[/alpha .+ =\s+(-?\d+(\.\d+)?)/,1].to_f
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def pull_tree(text)
|
|
62
|
+
text[/([^\n]+)\n\nDetailed/m,1]
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
end # End Report
|
|
66
|
+
end # End Codeml
|
|
67
|
+
end # End Bio::PAML
|