bio 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -0,0 +1,95 @@
1
+ #
2
+ # = bio/appl/paml/baseml.rb - Wrapper for running PAML program baseml
3
+ #
4
+ # Copyright:: Copyright (C) 2008
5
+ # Naohisa Goto <ng@bioruby.org>
6
+ #
7
+ # License:: The Ruby License
8
+ #
9
+ # == Description
10
+ #
11
+ # This file contains Bio::PAML::Baseml, a wrapper class running baseml.
12
+ #
13
+ # == References
14
+ #
15
+ # * http://abacus.gene.ucl.ac.uk/software/paml.html
16
+ #
17
+
18
+ require 'bio/appl/paml/common'
19
+
20
+ module Bio::PAML
21
+
22
+ # == Description
23
+ #
24
+ # Bio::PAML::Baseml is a wrapper for running PAML baseml program.
25
+ #
26
+ # Because most of the methods in this class are inherited from
27
+ # Bio::PAML::Common, see documents of Bio::PAML::Common for details.
28
+ #
29
+ # == Examples
30
+ #
31
+ # Example 1:
32
+ #
33
+ # require 'bio'
34
+ # # Reads multi-fasta formatted file and gets a Bio::Alignment object.
35
+ # alignment = Bio::FlatFile.open(Bio::Alignment::MultiFastaFormat,
36
+ # 'example.fst').alignment
37
+ # # Reads newick tree from a file
38
+ # tree = Bio::FlatFile.open(Bio::Newick, 'example.tree').tree
39
+ # # Creates a Baseml object
40
+ # baseml = Bio::PAML::Baseml.new
41
+ # # Sets parameters
42
+ # baseml.parameters[:runmode] = 0
43
+ # baseml.parameters[:RateAncestor] = 1
44
+ # # You can also set many parameters at a time.
45
+ # baseml.parameters.update({ :alpha => 0.5, :fix_alpha => 0 })
46
+ # # Executes baseml with the alignment and the tree
47
+ # report = baseml.query(alignment, tree)
48
+ #
49
+ class Baseml < Common
50
+
51
+ autoload :Report, 'bio/appl/paml/baseml/report'
52
+
53
+ # Default program name
54
+ DEFAULT_PROGRAM = 'baseml'.freeze
55
+
56
+ # Default parameters when running baseml.
57
+ #
58
+ # The parameters whose values are different from the baseml defalut
59
+ # value (described in pamlDOC.pdf) in PAML 4.1 are:
60
+ # seqfile, outfile, treefile, ndata, noisy, verbose
61
+ #
62
+ DEFAULT_PARAMETERS = {
63
+ # Essential argumemts
64
+ :seqfile => nil,
65
+ :outfile => nil,
66
+ # Optional arguments
67
+ :treefile => nil,
68
+ :noisy => 0,
69
+ :verbose => 1,
70
+ :runmode => 0,
71
+ :model => 5,
72
+ :Mgene => 0,
73
+ :ndata => 1,
74
+ :clock => 0,
75
+ :fix_kappa => 0,
76
+ :kappa => 2.5,
77
+ :fix_alpha => 1,
78
+ :alpha => 0.0,
79
+ :Malpha => 0,
80
+ :ncatG => 5,
81
+ :fix_rho => 1,
82
+ :rho => 0.0,
83
+ :nparK => 0,
84
+ :nhomo => 0,
85
+ :getSE => 0,
86
+ :RateAncestor => 0,
87
+ :Small_Diff => 1e-6,
88
+ :cleandata => 1,
89
+ :fix_blength => 0,
90
+ :method => 0
91
+ }
92
+
93
+ end #class Baseml
94
+ end #module Bio::PAML
95
+
@@ -0,0 +1,32 @@
1
+ #
2
+ # = bio/appl/paml/baseml/report.rb - parser class for PAML program baseml
3
+ #
4
+ # Copyright:: Copyright (C) 2008
5
+ # Naohisa Goto <ng@bioruby.org>
6
+ #
7
+ # License:: The Ruby License
8
+ #
9
+ # == Description
10
+ #
11
+ # This file contains Bio::PAML::Baseml::Report, a parser class for a result
12
+ # of baseml.
13
+ #
14
+ # == References
15
+ #
16
+ # * http://abacus.gene.ucl.ac.uk/software/paml.html
17
+ #
18
+
19
+ require 'bio/appl/paml/baseml'
20
+
21
+ module Bio::PAML
22
+ class Baseml
23
+
24
+ # UNDER CONSTRUCTION.
25
+ #
26
+ # Bio::PAML::Baseml::Report is a parser class for a baseml result.
27
+ #
28
+ class Report < Bio::PAML::Common::Report
29
+ end #class Report
30
+
31
+ end #class Baseml
32
+ end #module Bio::PAML
@@ -0,0 +1,242 @@
1
+ #
2
+ # = bio/appl/paml/codeml.rb - Wrapper for running PAML program codeml
3
+ #
4
+ # Copyright:: Copyright (C) 2008
5
+ # Michael D. Barton <mail@michaelbarton.me.uk>,
6
+ # Naohisa Goto <ng@bioruby.org>
7
+ #
8
+ # License:: The Ruby License
9
+ #
10
+ # == Description
11
+ #
12
+ # This file contains a wrapper for running the CODEML tool for estimating evolutionary rate
13
+ #
14
+ # == References
15
+ #
16
+ # * http://abacus.gene.ucl.ac.uk/software/paml.html
17
+ #
18
+
19
+ require 'bio/appl/paml/common'
20
+
21
+ module Bio
22
+ module PAML
23
+
24
+ # == Description
25
+ #
26
+ # Bio::PAML::Codeml is a wrapper for estimating evolutionary rate using the CODEML
27
+ # tool. The class provides methods for generating the necessary configuration
28
+ # file, and running codeml with the specified binary. Codeml output is
29
+ # returned when codeml is run. Bio::PAML::Codeml::Report and Bio::PAML::Codeml::Rates
30
+ # provide simple classes for parsing and accessing the Codeml report and
31
+ # rates files respectively.
32
+ #
33
+ # == Examples
34
+ #
35
+ # Example 1:
36
+ #
37
+ # require 'bio'
38
+ # # Reads multi-fasta formatted file and gets a Bio::Alignment object.
39
+ # alignment = Bio::FlatFile.open(Bio::Alignment::MultiFastaFormat,
40
+ # 'example.fst').alignment
41
+ # # Reads newick tree from a file
42
+ # tree = Bio::FlatFile.open(Bio::Newick, 'example.tree').tree
43
+ # # Creates a Codeml object
44
+ # codeml = Bio::PAML::Codeml.new
45
+ # # Sets parameters
46
+ # codeml.parameters[:runmode] = 0
47
+ # codeml.parameters[:RateAncestor] = 1
48
+ # # You can also set many parameters at a time.
49
+ # codeml.parameters.update({ :alpha => 0.5, :fix_alpha => 0 })
50
+ # # Executes codeml with the alignment and the tree
51
+ # report = codeml.query(alignment, tree)
52
+ #
53
+ # Example 2 (Obsolete usage):
54
+ #
55
+ # # Create a control file, setting some Codeml options
56
+ # # Default parameters are used otherwise, see RDoc for defaults
57
+ # # The names of the parameters correspond to those specified
58
+ # # in the Codeml documentation
59
+ # control_file = Tempfile.new('codeml_ctl')
60
+ # control_file.close(false)
61
+ # # Prepare output file as a temporary file
62
+ # output_file = Tempfile.new('codeml_test')
63
+ # output_file.close(false)
64
+ # Bio::PAML::Codeml.create_control_file(config_file.path, {
65
+ # :model => 1,
66
+ # :fix_kappa => 1,
67
+ # :aaRatefile => TEST_DATA + '/wag.dat',
68
+ # :seqfile => TEST_DATA + '/abglobin.aa',
69
+ # :treefile => TEST_DATA + '/abglobin.trees',
70
+ # :outfile => output_file.path,
71
+ # })
72
+ #
73
+ # # Create an instance of Codeml specifying where the codeml binary is
74
+ # codeml = Bio::PAML::Codeml.new('/path/to/codeml')
75
+ #
76
+ # # Run codeml using a control file
77
+ # # Returns the command line output
78
+ # codeml_output = codeml.run(control_file)
79
+ #
80
+ class Codeml < Common
81
+
82
+ autoload :Report, 'bio/appl/paml/codeml/report'
83
+ autoload :Rates, 'bio/appl/paml/codeml/rates'
84
+
85
+ # Default program name
86
+ DEFAULT_PROGRAM = 'codeml'.freeze
87
+
88
+ # Default parameters when running codeml.
89
+ #
90
+ # The parameters whose values are different from the codeml defalut
91
+ # value (described in pamlDOC.pdf) in PAML 4.1 are:
92
+ # seqfile, outfile, treefile, ndata, noisy, verbose, cleandata
93
+ #
94
+ DEFAULT_PARAMETERS = {
95
+ # Essential argumemts
96
+ :seqfile => nil,
97
+ :outfile => nil,
98
+ # Optional arguments
99
+ :treefile => nil,
100
+ :noisy => 0,
101
+ :verbose => 1,
102
+ :runmode => 0,
103
+ :seqtype => 2,
104
+ :CodonFreq => 2,
105
+ :ndata => 1,
106
+ :clock => 0,
107
+ :aaDist => 0,
108
+ :aaRatefile => 'wag.dat',
109
+ :model => 2,
110
+ :NSsites => 0,
111
+ :icode => 0,
112
+ :Mgene => 0,
113
+ :fix_kappa => 0,
114
+ :kappa => 2,
115
+ :fix_omega => 0,
116
+ :omega => 0.4,
117
+ :fix_alpha => 0,
118
+ :alpha => 0.0,
119
+ :Malpha => 0,
120
+ :ncatG => 3,
121
+ :fix_rho => 1,
122
+ :rho => 0.0,
123
+ :getSE => 0,
124
+ :RateAncestor => 0,
125
+ :Small_Diff => 0.5e-6,
126
+ :cleandata => 1,
127
+ :fix_blength => 0,
128
+ :method => 0
129
+ }
130
+
131
+ # OBSOLETE. This method should not be used.
132
+ # Instead, use parameters.
133
+ def options
134
+ warn 'The method Codeml#options will be changed to be used for command line arguments in the future. Instead, use Codeml#parameters.'
135
+ parameters
136
+ end
137
+
138
+ # OBSOLETE. This method should not be used.
139
+ # Instead, use parameters=(hash).
140
+ def options=(hash)
141
+ warn 'The method Codeml#options=() will be changed to be used for command line arguments in the future. Instead, use Codeml#parameters=().'
142
+ self.parameters=(hash)
143
+ end
144
+
145
+ # Obsolete. This method will be removed in the future.
146
+ # Helper method for creating a codeml control file.
147
+ # Note that default parameters are automatically merged.
148
+ def self.create_control_file(parameters, filename)
149
+ parameters = DEFAULT_PARAMETERS.merge(parameters)
150
+ File.open(filename, 'w') do |file|
151
+ parameters.each do |key, value|
152
+ file.puts "#{key.to_s} = #{value.to_s}" if value
153
+ end
154
+ end
155
+ filename
156
+ end
157
+
158
+ # OBSOLETE. This method will soon be removed.
159
+ # Instead, use create_control_file(parameters, filename).
160
+ def self.create_config_file(parameters, filename)
161
+ warn "The method Codeml.create_config_file(parameters, filename) will soon be removed. Instead, use Codeml.create_control_file(filename, parameters)."
162
+ create_control_file(parameters, filename)
163
+ end
164
+
165
+
166
+ # Runs the program on the internal parameters with the specified
167
+ # sequence alignment and tree.
168
+ #
169
+ # Note that parameters[:seqfile] and parameters[:outfile]
170
+ # are always modified, and parameters[:treefile] and
171
+ # parameters[:aaRatefile] are modified when tree and aarate are
172
+ # specified respectively.
173
+ #
174
+ # For other important information, see the document of
175
+ # Bio::PAML::Common#query.
176
+ #
177
+ # ---
178
+ # *Arguments*:
179
+ # * (required) _alignment_: Bio::Alignment object or similar object
180
+ # * (optional) _tree_: Bio::Tree object
181
+ # * (optional) _aarate_: String or nil
182
+ # *Returns*:: Report object
183
+ def query(alignment, tree = nil, aarate = nil)
184
+ begin
185
+ aaratefile = prepare_aaratefile(aarate)
186
+ ret = super(alignment, tree)
187
+ ensure
188
+ finalize_aaratefile(aaratefile)
189
+ end
190
+ ret
191
+ end
192
+
193
+ # Runs the program on the internal parameters with the specified
194
+ # sequence alignment data string and tree data string.
195
+ #
196
+ # Note that parameters[:outfile] is always modified, and
197
+ # parameters[:seqfile], parameters[:treefile], and
198
+ # parameters[:aaRatefile] are modified when
199
+ # alignment, tree, and aarate are specified respectively.
200
+ #
201
+ # It raises RuntimeError if seqfile is not specified in the argument
202
+ # or in the parameter.
203
+ #
204
+ # For other important information, see the document of query method.
205
+ #
206
+ # ---
207
+ # *Arguments*:
208
+ # * (optional) _alignment_: String
209
+ # * (optional) _tree_: String or nil
210
+ # * (optional) _aarate_: String or nil
211
+ # *Returns*:: contents of output file (String)
212
+ def query_by_string(alignment = nil, tree = nil, aarate = nil)
213
+ begin
214
+ aaratefile = prepare_aaratefile(aarate)
215
+ ret = super(alignment, tree)
216
+ ensure
217
+ finalize_aaratefile(aaratefile)
218
+ end
219
+ ret
220
+ end
221
+
222
+ private
223
+
224
+ # (private) prepares temporary file for aaRatefile if needed
225
+ def prepare_aaratefile(aarate)
226
+ if aarate then
227
+ aaratefile = Tempfile.new('codeml_aarate')
228
+ aaratefile.print aarate
229
+ aaratefile.close(false)
230
+ self.parameters[:aaRatefile] = aaratefile.path
231
+ end
232
+ aaratefile
233
+ end
234
+
235
+ # (private) removes temporary file for aaRatefile if needed
236
+ def finalize_aaratefile(aaratefile)
237
+ aaratefile.close(true) if aaratefile
238
+ end
239
+
240
+ end # End class Codeml
241
+ end # End module PAML
242
+ end # End module Bio
@@ -0,0 +1,67 @@
1
+ #
2
+ # = bio/appl/paml/codeml/rates.rb - Codeml rates report file parser
3
+ #
4
+ # Copyright:: Copyright (C) 2008 Michael D. Barton <mail@michaelbarton.me.uk>
5
+ #
6
+ # License:: The Ruby License
7
+ #
8
+ # == Description
9
+ #
10
+ # This file contains a class that implement a simple interface to Codeml rates estimation file
11
+ #
12
+ # == References
13
+ #
14
+ # * http://abacus.gene.ucl.ac.uk/software/paml.html
15
+ #
16
+
17
+ require 'delegate'
18
+ require 'bio/appl/paml/codeml'
19
+
20
+ module Bio::PAML
21
+ class Codeml
22
+
23
+ # == Description
24
+ #
25
+ # A simple class for parsing the codeml rates file.
26
+ #
27
+ # WARNING: The order of the parsed data should be correct, however will
28
+ # not necessarily correspond to the position in the alignment. For instance
29
+ # codeml ignores columns that contains gaps, and therefore there will not
30
+ # be any estimated rate data.
31
+ #
32
+ # == Usage
33
+ #
34
+ # site_rates = Bio::PAML::Codeml::Rates.new(File.open(@tmp_dir + "/rates").read)
35
+ # site_rate.first[:freq] # => Number of times that column appears
36
+ # site_rate.[5][:rate] # => Estimated rate of evolution
37
+ # site_rate.last[:data] # => The content of the column, as a string
38
+ #
39
+ # # This class delegates to an array, so will respond to all array methods
40
+ # site_rates.max {|x,y| x[:rate] <=> y[:rate] } # => Fastest evolving column
41
+ # site_rates.detect {|x| x[:freq] > 1 } # => Columns appearing more than once
42
+ class Rates < DelegateClass(Array)
43
+
44
+ def initialize(rates)
45
+ super(parse_rates(rates))
46
+ end
47
+
48
+ private
49
+
50
+ def parse_rates(text)
51
+ re = /\s+(\d+)\s+(\d+)\s+([A-Z\*]+)\s+(\d+\.\d+)\s+(\d)/
52
+ array = Array.new
53
+ text.each_line do |line|
54
+ if re =~ line
55
+ match = Regexp.last_match
56
+ array[match[1].to_i] = {
57
+ :freq => match[2].to_i,
58
+ :data => match[3],
59
+ :rate => match[4].to_f }
60
+ end
61
+ end
62
+ array.compact
63
+ end
64
+
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,67 @@
1
+ #
2
+ # = bio/appl/paml/codeml/report.rb - Codeml report parser
3
+ #
4
+ # Copyright:: Copyright (C) 2008 Michael D. Barton <mail@michaelbarton.me.uk>
5
+ #
6
+ # License:: The Ruby License
7
+ #
8
+ # == Description
9
+ #
10
+ # This file contains a class that implement a simple interface to Codeml output file
11
+ #
12
+ # == References
13
+ #
14
+ # * http://abacus.gene.ucl.ac.uk/software/paml.html
15
+ #
16
+
17
+ require 'bio/appl/paml/codeml'
18
+
19
+ module Bio::PAML
20
+ class Codeml
21
+
22
+ # == Description
23
+ #
24
+ # A simple class for parsing codeml output.
25
+ #
26
+ # WARNING: This data is parsed using a regex from the output file, and
27
+ # so will take the first result found. If using multiple tree's, your
28
+ # milage may vary. See the source for the regular expressions.
29
+ #
30
+ # require 'bio'
31
+ #
32
+ # report = Bio::PAML::Codeml::Report.new(File.open(codeml_output_file).read)
33
+ # report.gene_rate # => Rate of gene evolution as defined be alpha
34
+ # report.tree_lengh # => Estimated phylogetic tree length
35
+ class Report < Bio::PAML::Common::Report
36
+
37
+ attr_reader :tree_log_likelihood, :tree_length, :alpha, :tree
38
+
39
+ def initialize(codeml_report)
40
+ @tree_log_likelihood = pull_tree_log_likelihood(codeml_report)
41
+ @tree_length = pull_tree_length(codeml_report)
42
+ @alpha = pull_alpha(codeml_report)
43
+ @tree = pull_tree(codeml_report)
44
+ end
45
+
46
+ private
47
+
48
+ def pull_tree_log_likelihood(text)
49
+ text[/lnL\(.+\):\s+(-?\d+(\.\d+)?)/,1].to_f
50
+ end
51
+
52
+
53
+ def pull_tree_length(text)
54
+ text[/tree length\s+=\s+ (-?\d+(\.\d+)?)/,1].to_f
55
+ end
56
+
57
+ def pull_alpha(text)
58
+ text[/alpha .+ =\s+(-?\d+(\.\d+)?)/,1].to_f
59
+ end
60
+
61
+ def pull_tree(text)
62
+ text[/([^\n]+)\n\nDetailed/m,1]
63
+ end
64
+
65
+ end # End Report
66
+ end # End Codeml
67
+ end # End Bio::PAML