bio 1.4.0 → 1.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +1712 -0
- data/KNOWN_ISSUES.rdoc +11 -1
- data/README.rdoc +3 -2
- data/RELEASE_NOTES.rdoc +65 -127
- data/bioruby.gemspec +38 -2
- data/doc/RELEASE_NOTES-1.4.0.rdoc +167 -0
- data/doc/Tutorial.rd +74 -16
- data/doc/Tutorial.rd.html +68 -16
- data/lib/bio.rb +2 -0
- data/lib/bio/appl/clustalw/report.rb +18 -0
- data/lib/bio/appl/paml/codeml/report.rb +579 -21
- data/lib/bio/command.rb +149 -21
- data/lib/bio/db/aaindex.rb +11 -1
- data/lib/bio/db/embl/sptr.rb +1 -1
- data/lib/bio/db/fasta/defline.rb +7 -2
- data/lib/bio/db/fasta/qual.rb +24 -0
- data/lib/bio/db/fasta/qual_to_biosequence.rb +29 -0
- data/lib/bio/db/fastq.rb +15 -0
- data/lib/bio/db/go.rb +2 -2
- data/lib/bio/db/kegg/common.rb +109 -5
- data/lib/bio/db/kegg/genes.rb +61 -15
- data/lib/bio/db/kegg/genome.rb +43 -38
- data/lib/bio/db/kegg/module.rb +158 -0
- data/lib/bio/db/kegg/orthology.rb +40 -1
- data/lib/bio/db/kegg/pathway.rb +254 -0
- data/lib/bio/db/medline.rb +6 -2
- data/lib/bio/io/flatfile/autodetection.rb +6 -0
- data/lib/bio/location.rb +39 -0
- data/lib/bio/reference.rb +24 -0
- data/lib/bio/sequence.rb +2 -0
- data/lib/bio/sequence/adapter.rb +1 -0
- data/lib/bio/sequence/format.rb +14 -0
- data/lib/bio/sequence/sequence_masker.rb +95 -0
- data/lib/bio/tree.rb +4 -4
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +5 -0
- data/lib/bio/version.rb +1 -1
- data/setup.rb +5 -0
- data/test/data/KEGG/K02338.orthology +180 -52
- data/test/data/KEGG/M00118.module +44 -0
- data/test/data/KEGG/T00005.genome +140 -0
- data/test/data/KEGG/T00070.genome +34 -0
- data/test/data/KEGG/b0529.gene +47 -0
- data/test/data/KEGG/ec00072.pathway +23 -0
- data/test/data/KEGG/hsa00790.pathway +59 -0
- data/test/data/KEGG/ko00312.pathway +16 -0
- data/test/data/KEGG/map00030.pathway +37 -0
- data/test/data/KEGG/map00052.pathway +13 -0
- data/test/data/KEGG/rn00250.pathway +114 -0
- data/test/data/clustalw/example1.aln +58 -0
- data/test/data/go/selected_component.ontology +12 -0
- data/test/data/go/selected_gene_association.sgd +31 -0
- data/test/data/go/selected_wikipedia2go +13 -0
- data/test/data/medline/20146148_modified.medline +54 -0
- data/test/data/paml/codeml/models/aa.aln +26 -0
- data/test/data/paml/codeml/models/aa.dnd +13 -0
- data/test/data/paml/codeml/models/aa.ph +13 -0
- data/test/data/paml/codeml/models/alignment.phy +49 -0
- data/test/data/paml/codeml/models/results0-3.txt +312 -0
- data/test/data/paml/codeml/models/results7-8.txt +340 -0
- data/test/functional/bio/io/test_togows.rb +8 -8
- data/test/functional/bio/test_command.rb +7 -6
- data/test/unit/bio/appl/clustalw/test_report.rb +80 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +6 -6
- data/test/unit/bio/appl/paml/codeml/test_report.rb +231 -24
- data/test/unit/bio/appl/paml/codeml/test_report_single.rb +46 -0
- data/test/unit/bio/db/embl/test_sptr.rb +1 -1
- data/test/unit/bio/db/fasta/test_defline.rb +160 -0
- data/test/unit/bio/db/fasta/test_defline_misc.rb +490 -0
- data/test/unit/bio/db/kegg/test_genes.rb +281 -1
- data/test/unit/bio/db/kegg/test_genome.rb +408 -0
- data/test/unit/bio/db/kegg/test_module.rb +246 -0
- data/test/unit/bio/db/kegg/test_orthology.rb +95 -0
- data/test/unit/bio/db/kegg/test_pathway.rb +1250 -0
- data/test/unit/bio/db/test_aaindex.rb +8 -7
- data/test/unit/bio/db/test_fastq.rb +36 -0
- data/test/unit/bio/db/test_go.rb +171 -0
- data/test/unit/bio/db/test_medline.rb +148 -0
- data/test/unit/bio/db/test_qual.rb +9 -2
- data/test/unit/bio/sequence/test_sequence_masker.rb +169 -0
- data/test/unit/bio/test_tree.rb +260 -1
- data/test/unit/bio/util/test_contingency_table.rb +7 -7
- metadata +53 -6
data/doc/Tutorial.rd
CHANGED
@@ -21,8 +21,12 @@
|
|
21
21
|
# cat Tutorial.rd | sed -e "s,bioruby>,>>," | sed "s,==>,=>," > Tutorial.rd.tmp
|
22
22
|
# rubydoctest Tutorial.rd.tmp
|
23
23
|
#
|
24
|
-
#
|
24
|
+
# alternatively, the Ruby way is
|
25
25
|
#
|
26
|
+
# ruby -p -e '$_.sub!(/bioruby\>/, ">>"); $_.sub!(/\=\=\>/, "=>")' Tutorial.rd > Tutorial.rd.tmp
|
27
|
+
# rubydoctest Tutorial.rd.tmp
|
28
|
+
#
|
29
|
+
# Rubydoctest is useful to verify an example in this document (still) works
|
26
30
|
#
|
27
31
|
#
|
28
32
|
|
@@ -34,9 +38,9 @@ bioruby> $: << '../lib'
|
|
34
38
|
= BioRuby Tutorial
|
35
39
|
|
36
40
|
* Copyright (C) 2001-2003 KATAYAMA Toshiaki <k .at. bioruby.org>
|
37
|
-
* Copyright (C) 2005-
|
41
|
+
* Copyright (C) 2005-2010 Pjotr Prins, Naohisa Goto and others
|
38
42
|
|
39
|
-
This document was last modified:
|
43
|
+
This document was last modified: 2010/01/08
|
40
44
|
Current editor: Pjotr Prins <p .at. bioruby.org>
|
41
45
|
|
42
46
|
The latest version resides in the GIT source code repository: ./doc/((<Tutorial.rd|URL:http://github.com/pjotrp/bioruby/raw/documentation/doc/Tutorial.rd>)).
|
@@ -46,8 +50,8 @@ The latest version resides in the GIT source code repository: ./doc/((<Tutorial
|
|
46
50
|
This is a tutorial for using Bioruby. A basic knowledge of Ruby is required.
|
47
51
|
If you want to know more about the programming langauge Ruby we recommend the
|
48
52
|
latest Ruby book ((<Programming Ruby|URL:http://www.pragprog.com/titles/ruby>))
|
49
|
-
by Dave Thomas and Andy Hunt -
|
50
|
-
((<here|URL:http://www.
|
53
|
+
by Dave Thomas and Andy Hunt - the first edition is online
|
54
|
+
((<here|URL:http://www.ruby-doc.org/docs/ProgrammingRuby/>)).
|
51
55
|
|
52
56
|
For BioRuby you need to install Ruby and the BioRuby package on your computer
|
53
57
|
|
@@ -64,8 +68,13 @@ If you see no such thing you'll have to install Ruby using your installation
|
|
64
68
|
manager. For more information see the
|
65
69
|
((<Ruby|URL:http://www.ruby-lang.org/en/>)) website.
|
66
70
|
|
67
|
-
|
68
|
-
((<Bioruby|URL:http://bioruby.org/>)) website.
|
71
|
+
With Ruby download and install Bioruby using the links on the
|
72
|
+
((<Bioruby|URL:http://bioruby.org/>)) website. The recommended installation is via
|
73
|
+
Ruby gems:
|
74
|
+
|
75
|
+
gem install bio
|
76
|
+
|
77
|
+
See also the Bioruby ((<wiki|URL:http://bioruby.open-bio.org/wiki/Installation>)).
|
69
78
|
|
70
79
|
A lot of BioRuby's documentation exists in the source code and unit tests. To
|
71
80
|
really dive in you will need the latest source code tree. The embedded rdoc
|
@@ -1211,7 +1220,64 @@ Bio::Fetch.query method.)
|
|
1211
1220
|
|
1212
1221
|
== BioSQL
|
1213
1222
|
|
1214
|
-
to
|
1223
|
+
BioSQL is a well known schema to store and retrive biological sequences using a RDBMS like PostgreSQL or MySQL; note that SQLite is not supported.
|
1224
|
+
First of all, you must install a database engine or have access to a remote one. Then create the schema and populate with the taxonomy. You can follow the ((<Official Guide|URL:http://code.open-bio.org/svnweb/index.cgi/biosql/view/biosql-schema/trunk/INSTALL>)) .
|
1225
|
+
Next step is to install these gems:
|
1226
|
+
* ActiveRecord
|
1227
|
+
* CompositePrimaryKeys (Rails doesn't handle by default composite primary keys)
|
1228
|
+
* The layer to comunicate with you preferred RDBMS (postgresql, mysql, jdbcmysql in case you are running JRuby )
|
1229
|
+
|
1230
|
+
|
1231
|
+
You can find ActiveRecord's models in /bioruby/lib/bio/io/biosql
|
1232
|
+
|
1233
|
+
When you have your database up and running, you can connect to it in this way:
|
1234
|
+
|
1235
|
+
#!/usr/bin/env ruby
|
1236
|
+
|
1237
|
+
require 'bio'
|
1238
|
+
|
1239
|
+
connection = Bio::SQL.establish_connection({'development'=>{'hostname'=>"YourHostname",
|
1240
|
+
'database'=>"CoolBioSeqDB",
|
1241
|
+
'adapter'=>"jdbcmysql",
|
1242
|
+
'username'=>"YourUser",
|
1243
|
+
'password'=>"YouPassword"
|
1244
|
+
}
|
1245
|
+
},
|
1246
|
+
'development')
|
1247
|
+
|
1248
|
+
#The first parameter is the hash contaning the description of the configuration similar to database.yml in Rails application, you can declare different environment. The second parameter is the environment to use: 'development', 'test', 'production'.
|
1249
|
+
|
1250
|
+
#To store a sequence into the database you simply need a biosequence object.
|
1251
|
+
biosql_database = Bio::SQL::Biodatabase.find(:first)
|
1252
|
+
ff = Bio::GenBank.open("gbvrl1.seq")
|
1253
|
+
|
1254
|
+
ff.each_entry do |gb|
|
1255
|
+
Bio::SQL::Sequence.new(:biosequence=>gb.to_biosequence, :biodatabase=>biosql_database
|
1256
|
+
end
|
1257
|
+
|
1258
|
+
#You can list all the entries into every database
|
1259
|
+
Bio::SQL.list_entries
|
1260
|
+
|
1261
|
+
#list databases:
|
1262
|
+
Bio::SQL.list_databases
|
1263
|
+
|
1264
|
+
#retriving a generic accession
|
1265
|
+
bioseq = Bio::SQL.fetch_accession("YouAccession")
|
1266
|
+
|
1267
|
+
#If you use biosequence objects, you will find all its method mapped to BioSQL sequences. But you can also access to the models directly:
|
1268
|
+
|
1269
|
+
#get the raw sequence associated with you accession
|
1270
|
+
bioseq.entry.biosequence
|
1271
|
+
|
1272
|
+
#get the length of your sequence, this is the explicit form of bioseq.length
|
1273
|
+
bioseq.entry.biosequence.length
|
1274
|
+
|
1275
|
+
#convert the sequence in GenBank format
|
1276
|
+
bioseq.to_biosequence.output(:genbank)
|
1277
|
+
|
1278
|
+
BioSQL' ((<schema|URL:http://www.biosql.org/wiki/Schema_Overview>)) is not so intuitive at the beginning, spend some time on understanding it, in the end if you know a little bit of rails everything will go smootly. You can find information to Annotation ((<here|URL:http://www.biosql.org/wiki/Annotation_Mapping>))
|
1279
|
+
ToDo: add exemaples from George. I remember he did some cool post on BioSQL and Rails.
|
1280
|
+
|
1215
1281
|
|
1216
1282
|
= PhyloXML
|
1217
1283
|
|
@@ -1400,14 +1466,6 @@ Gene Ontologies can be fetched through the Ruby Ensembl API package:
|
|
1400
1466
|
Prints each mosq. accession/uniq identifier and the GO terms from the Drosphila
|
1401
1467
|
homologues.
|
1402
1468
|
|
1403
|
-
== Comparing BioProjects
|
1404
|
-
|
1405
|
-
For a quick functional comparison of BioRuby, BioPerl, BioPython and Bioconductor (R) see ((<URL:http://sciruby.codeforpeople.com/sr.cgi/BioProjects>))
|
1406
|
-
|
1407
|
-
== Using BioRuby with R
|
1408
|
-
|
1409
|
-
Using Ruby with R Pjotr wrote a section on SciRuby. See ((<URL:http://sciruby.codeforpeople.com/sr.cgi/RubyWithRlang>))
|
1410
|
-
|
1411
1469
|
== Using BioPerl or BioPython from Ruby
|
1412
1470
|
|
1413
1471
|
At the moment there is no easy way of accessing BioPerl from Ruby. The best way, perhaps, is to create a Perl server that gets accessed through XML/RPC or SOAP.
|
data/doc/Tutorial.rd.html
CHANGED
@@ -11,17 +11,17 @@
|
|
11
11
|
<h1><a name="label-0" id="label-0">BioRuby Tutorial</a></h1><!-- RDLabel: "BioRuby Tutorial" -->
|
12
12
|
<ul>
|
13
13
|
<li>Copyright (C) 2001-2003 KATAYAMA Toshiaki <k .at. bioruby.org></li>
|
14
|
-
<li>Copyright (C) 2005-
|
14
|
+
<li>Copyright (C) 2005-2010 Pjotr Prins, Naohisa Goto and others</li>
|
15
15
|
</ul>
|
16
|
-
<p>This document was last modified:
|
16
|
+
<p>This document was last modified: 2010/01/08
|
17
17
|
Current editor: Pjotr Prins <p .at. bioruby.org></p>
|
18
18
|
<p>The latest version resides in the GIT source code repository: ./doc/<a href="http://github.com/pjotrp/bioruby/raw/documentation/doc/Tutorial.rd">Tutorial.rd</a>.</p>
|
19
19
|
<h2><a name="label-1" id="label-1">Introduction</a></h2><!-- RDLabel: "Introduction" -->
|
20
20
|
<p>This is a tutorial for using Bioruby. A basic knowledge of Ruby is required.
|
21
21
|
If you want to know more about the programming langauge Ruby we recommend the
|
22
22
|
latest Ruby book <a href="http://www.pragprog.com/titles/ruby">Programming Ruby</a>
|
23
|
-
by Dave Thomas and Andy Hunt -
|
24
|
-
<a href="http://www.
|
23
|
+
by Dave Thomas and Andy Hunt - the first edition is online
|
24
|
+
<a href="http://www.ruby-doc.org/docs/ProgrammingRuby/">here</a>.</p>
|
25
25
|
<p>For BioRuby you need to install Ruby and the BioRuby package on your computer</p>
|
26
26
|
<p>You can check whether Ruby is installed on your computer and what
|
27
27
|
version it has with the</p>
|
@@ -31,8 +31,11 @@ version it has with the</p>
|
|
31
31
|
<p>If you see no such thing you'll have to install Ruby using your installation
|
32
32
|
manager. For more information see the
|
33
33
|
<a href="http://www.ruby-lang.org/en/">Ruby</a> website.</p>
|
34
|
-
<p>
|
35
|
-
<a href="http://bioruby.org/">Bioruby</a> website
|
34
|
+
<p>With Ruby download and install Bioruby using the links on the
|
35
|
+
<a href="http://bioruby.org/">Bioruby</a> website. The recommended installation is via
|
36
|
+
Ruby gems:</p>
|
37
|
+
<pre>gem install bio</pre>
|
38
|
+
<p>See also the Bioruby <a href="http://bioruby.open-bio.org/wiki/Installation">wiki</a>.</p>
|
36
39
|
<p>A lot of BioRuby's documentation exists in the source code and unit tests. To
|
37
40
|
really dive in you will need the latest source code tree. The embedded rdoc
|
38
41
|
documentation can be viewed online at
|
@@ -946,7 +949,60 @@ Because the KEGG/GENES database and AAindex database are not available
|
|
946
949
|
from other BioFetch servers, we used bioruby.org server with
|
947
950
|
Bio::Fetch.query method.)</p>
|
948
951
|
<h2><a name="label-22" id="label-22">BioSQL</a></h2><!-- RDLabel: "BioSQL" -->
|
949
|
-
<p>to
|
952
|
+
<p>BioSQL is a well known schema to store and retrive biological sequences using a RDBMS like PostgreSQL or MySQL; note that SQLite is not supported.
|
953
|
+
First of all, you must install a database engine or have access to a remote one. Then create the schema and populate with the taxonomy. You can follow the <a href="http://code.open-bio.org/svnweb/index.cgi/biosql/view/biosql-schema/trunk/INSTALL">Official Guide</a> .
|
954
|
+
Next step is to install these gems:</p>
|
955
|
+
<ul>
|
956
|
+
<li>ActiveRecord</li>
|
957
|
+
<li>CompositePrimaryKeys (Rails doesn't handle by default composite primary keys)</li>
|
958
|
+
<li>The layer to comunicate with you preferred RDBMS (postgresql, mysql, jdbcmysql in case you are running JRuby )</li>
|
959
|
+
</ul>
|
960
|
+
<p>You can find ActiveRecord's models in /bioruby/lib/bio/io/biosql</p>
|
961
|
+
<p>When you have your database up and running, you can connect to it in this way:</p>
|
962
|
+
<pre>#!/usr/bin/env ruby
|
963
|
+
|
964
|
+
require 'bio'
|
965
|
+
|
966
|
+
connection = Bio::SQL.establish_connection({'development'=>{'hostname'=>"YourHostname",
|
967
|
+
'database'=>"CoolBioSeqDB",
|
968
|
+
'adapter'=>"jdbcmysql",
|
969
|
+
'username'=>"YourUser",
|
970
|
+
'password'=>"YouPassword"
|
971
|
+
}
|
972
|
+
},
|
973
|
+
'development')
|
974
|
+
|
975
|
+
#The first parameter is the hash contaning the description of the configuration similar to database.yml in Rails application, you can declare different environment. The second parameter is the environment to use: 'development', 'test', 'production'.
|
976
|
+
|
977
|
+
#To store a sequence into the database you simply need a biosequence object.
|
978
|
+
biosql_database = Bio::SQL::Biodatabase.find(:first)
|
979
|
+
ff = Bio::GenBank.open("gbvrl1.seq")
|
980
|
+
|
981
|
+
ff.each_entry do |gb|
|
982
|
+
Bio::SQL::Sequence.new(:biosequence=>gb.to_biosequence, :biodatabase=>biosql_database
|
983
|
+
end
|
984
|
+
|
985
|
+
#You can list all the entries into every database
|
986
|
+
Bio::SQL.list_entries
|
987
|
+
|
988
|
+
#list databases:
|
989
|
+
Bio::SQL.list_databases
|
990
|
+
|
991
|
+
#retriving a generic accession
|
992
|
+
bioseq = Bio::SQL.fetch_accession("YouAccession")
|
993
|
+
|
994
|
+
#If you use biosequence objects, you will find all its method mapped to BioSQL sequences. But you can also access to the models directly:
|
995
|
+
|
996
|
+
#get the raw sequence associated with you accession
|
997
|
+
bioseq.entry.biosequence
|
998
|
+
|
999
|
+
#get the length of your sequence, this is the explicit form of bioseq.length
|
1000
|
+
bioseq.entry.biosequence.length
|
1001
|
+
|
1002
|
+
#convert the sequence in GenBank format
|
1003
|
+
bioseq.to_biosequence.output(:genbank)</pre>
|
1004
|
+
<p>BioSQL' <a href="http://www.biosql.org/wiki/Schema_Overview">schema</a> is not so intuitive at the beginning, spend some time on understanding it, in the end if you know a little bit of rails everything will go smootly. You can find information to Annotation <a href="http://www.biosql.org/wiki/Annotation_Mapping">here</a>
|
1005
|
+
ToDo: add exemaples from George. I remember he did some cool post on BioSQL and Rails.</p>
|
950
1006
|
<h1><a name="label-23" id="label-23">PhyloXML</a></h1><!-- RDLabel: "PhyloXML" -->
|
951
1007
|
<p>PhyloXML is an XML language for saving, analyzing and exchanging data of
|
952
1008
|
annotated phylogenetic trees. PhyloXML parser in BioRuby is implemented in
|
@@ -1087,13 +1143,9 @@ infile.each do |line|
|
|
1087
1143
|
end</pre>
|
1088
1144
|
<p>Prints each mosq. accession/uniq identifier and the GO terms from the Drosphila
|
1089
1145
|
homologues.</p>
|
1090
|
-
<h2><a name="label-38" id="label-38">
|
1091
|
-
<p>For a quick functional comparison of BioRuby, BioPerl, BioPython and Bioconductor (R) see <a href="http://sciruby.codeforpeople.com/sr.cgi/BioProjects"><URL:http://sciruby.codeforpeople.com/sr.cgi/BioProjects></a></p>
|
1092
|
-
<h2><a name="label-39" id="label-39">Using BioRuby with R</a></h2><!-- RDLabel: "Using BioRuby with R" -->
|
1093
|
-
<p>Using Ruby with R Pjotr wrote a section on SciRuby. See <a href="http://sciruby.codeforpeople.com/sr.cgi/RubyWithRlang"><URL:http://sciruby.codeforpeople.com/sr.cgi/RubyWithRlang></a></p>
|
1094
|
-
<h2><a name="label-40" id="label-40">Using BioPerl or BioPython from Ruby</a></h2><!-- RDLabel: "Using BioPerl or BioPython from Ruby" -->
|
1146
|
+
<h2><a name="label-38" id="label-38">Using BioPerl or BioPython from Ruby</a></h2><!-- RDLabel: "Using BioPerl or BioPython from Ruby" -->
|
1095
1147
|
<p>At the moment there is no easy way of accessing BioPerl from Ruby. The best way, perhaps, is to create a Perl server that gets accessed through XML/RPC or SOAP.</p>
|
1096
|
-
<h2><a name="label-
|
1148
|
+
<h2><a name="label-39" id="label-39">Installing required external library</a></h2><!-- RDLabel: "Installing required external library" -->
|
1097
1149
|
<p>At this point for using BioRuby no additional libraries are needed, except if
|
1098
1150
|
you are using Bio::PhyloXML module. Then you have to install libxml-ruby.</p>
|
1099
1151
|
<p>This may change, so keep an eye on the Bioruby website. Also when
|
@@ -1102,20 +1154,20 @@ a package is missing BioRuby should show an informative message.</p>
|
|
1102
1154
|
painful, as the gem standard for packages evolved late and some still
|
1103
1155
|
force you to copy things by hand. Therefore read the README's
|
1104
1156
|
carefully that come with each package.</p>
|
1105
|
-
<h3><a name="label-
|
1157
|
+
<h3><a name="label-40" id="label-40">Installing libxml-ruby</a></h3><!-- RDLabel: "Installing libxml-ruby" -->
|
1106
1158
|
<p>The simplest way is to use gem packaging system.</p>
|
1107
1159
|
<pre>gem install -r libxml-ruby</pre>
|
1108
1160
|
<p>If you get `require': no such file to load - mkmf (LoadError) error then do</p>
|
1109
1161
|
<pre>sudo apt-get install ruby-dev</pre>
|
1110
1162
|
<p>If you have other problems with installation, then see <a href="http://libxml.rubyforge.org/install.xml"><URL:http://libxml.rubyforge.org/install.xml></a> </p>
|
1111
|
-
<h2><a name="label-
|
1163
|
+
<h2><a name="label-41" id="label-41">Trouble shooting</a></h2><!-- RDLabel: "Trouble shooting" -->
|
1112
1164
|
<ul>
|
1113
1165
|
<li>Error: in `require': no such file to load -- bio (LoadError)</li>
|
1114
1166
|
</ul>
|
1115
1167
|
<p>Ruby fails to find the BioRuby libraries - add it to the RUBYLIB path, or pass
|
1116
1168
|
it to the interpeter. For example:</p>
|
1117
1169
|
<pre>ruby -I$BIORUBYPATH/lib yourprogram.rb</pre>
|
1118
|
-
<h2><a name="label-
|
1170
|
+
<h2><a name="label-42" id="label-42">Modifying this page</a></h2><!-- RDLabel: "Modifying this page" -->
|
1119
1171
|
<p>IMPORTANT NOTICE: This page is maintained in the BioRuby source code
|
1120
1172
|
repository. Please edit the file there otherwise changes may get
|
1121
1173
|
lost. See <!-- Reference, RDLabel "BioRuby Developer Information" doesn't exist --><em class="label-not-found">BioRuby Developer Information</em><!-- Reference end --> for repository and mailing list
|
data/lib/bio.rb
CHANGED
@@ -107,6 +107,8 @@ module Bio
|
|
107
107
|
autoload :EXPRESSION, 'bio/db/kegg/expression'
|
108
108
|
autoload :ORTHOLOGY, 'bio/db/kegg/orthology'
|
109
109
|
autoload :KGML, 'bio/db/kegg/kgml'
|
110
|
+
autoload :PATHWAY, 'bio/db/kegg/pathway'
|
111
|
+
autoload :MODULE, 'bio/db/kegg/module'
|
110
112
|
autoload :Taxonomy, 'bio/db/kegg/taxonomy'
|
111
113
|
end
|
112
114
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# = bio/appl/clustalw/report.rb - CLUSTAL W format data (*.aln) class
|
3
3
|
#
|
4
4
|
# Copyright:: Copyright (C) 2003 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
|
5
|
+
# Copyright (C) 2010 Pjotr Prins <pjotr.prins@thebird.nl>
|
6
|
+
#
|
5
7
|
# License:: The Ruby License
|
6
8
|
#
|
7
9
|
# $Id: report.rb,v 1.13 2007/07/18 08:47:39 ngoto Exp $
|
@@ -72,6 +74,22 @@ module Bio
|
|
72
74
|
@header or (do_parse or @header)
|
73
75
|
end
|
74
76
|
|
77
|
+
# Returns the Bio::Sequence in the matrix at row 'row' as
|
78
|
+
# Bio::Sequence object. When _row_ is out of range a nil is returned.
|
79
|
+
# ---
|
80
|
+
# *Arguments*:
|
81
|
+
# * (required) _row_: Integer
|
82
|
+
# *Returns*:: Bio::Sequence
|
83
|
+
def get_sequence(row)
|
84
|
+
a = alignment
|
85
|
+
return nil if row < 0 or row >= a.keys.size
|
86
|
+
id = a.keys[row]
|
87
|
+
seq = a.to_hash[id]
|
88
|
+
s = Bio::Sequence.new(seq.seq)
|
89
|
+
s.definition = id
|
90
|
+
s
|
91
|
+
end
|
92
|
+
|
75
93
|
# Shows "match line" of CLUSTAL's alignment result, for example,
|
76
94
|
# ':* :* .* * .*::*. ** :* . * . '.
|
77
95
|
# Returns a string.
|
@@ -1,41 +1,369 @@
|
|
1
1
|
#
|
2
2
|
# = bio/appl/paml/codeml/report.rb - Codeml report parser
|
3
3
|
#
|
4
|
-
# Copyright:: Copyright (C) 2008
|
4
|
+
# Copyright:: Copyright (C) 2008-2010
|
5
|
+
# Michael D. Barton <mail@michaelbarton.me.uk>,
|
6
|
+
# Pjotr Prins <pjotr.prins@thebird.nl>
|
5
7
|
#
|
6
8
|
# License:: The Ruby License
|
7
9
|
#
|
8
|
-
# == Description
|
9
|
-
#
|
10
|
-
# This file contains a class that implement a simple interface to Codeml output file
|
11
|
-
#
|
12
|
-
# == References
|
13
|
-
#
|
14
|
-
# * http://abacus.gene.ucl.ac.uk/software/paml.html
|
15
|
-
#
|
16
10
|
|
17
11
|
require 'bio/appl/paml/codeml'
|
18
12
|
|
19
13
|
module Bio::PAML
|
14
|
+
|
20
15
|
class Codeml
|
21
16
|
|
22
17
|
# == Description
|
23
18
|
#
|
24
|
-
#
|
19
|
+
# Run PAML codeml and get the results from the output file. The
|
20
|
+
# Codeml::Report object is returned by Bio::PAML::Codeml.query. For
|
21
|
+
# example
|
22
|
+
#
|
23
|
+
# codeml = Bio::PAML::Codeml.new('codeml', :runmode => 0,
|
24
|
+
# :RateAncestor => 1, :alpha => 0.5, :fix_alpha => 0)
|
25
|
+
# result = codeml.query(alignment, tree)
|
26
|
+
#
|
27
|
+
# where alignment and tree are Bioruby objects. This class assumes we have a
|
28
|
+
# buffer containing the output of codeml.
|
29
|
+
#
|
30
|
+
# == References
|
31
|
+
#
|
32
|
+
# Phylogenetic Analysis by Maximum Likelihood (PAML) is a package of
|
33
|
+
# programs for phylogenetic analyses of DNA or protein sequences using
|
34
|
+
# maximum likelihood. It is maintained and distributed for academic use
|
35
|
+
# free of charge by Ziheng Yang. Suggestion citation
|
36
|
+
#
|
37
|
+
# Yang, Z. 1997
|
38
|
+
# PAML: a program package for phylogenetic analysis by maximum likelihood
|
39
|
+
# CABIOS 13:555-556
|
40
|
+
#
|
41
|
+
# http://abacus.gene.ucl.ac.uk/software/paml.html
|
42
|
+
#
|
43
|
+
# == Examples
|
44
|
+
#
|
45
|
+
#--
|
46
|
+
# The following is not shown in the documentation
|
47
|
+
#
|
48
|
+
# >> require 'bio'
|
49
|
+
# >> require 'bio/test/biotestfile'
|
50
|
+
# >> buf = BioTestFile.read('paml/codeml/models/results0-3.txt')
|
51
|
+
#++
|
52
|
+
#
|
53
|
+
# Invoke Bioruby's PAML codeml parser, after having read the contents
|
54
|
+
# of the codeml result file into _buf_ (for example using File.read)
|
55
|
+
#
|
56
|
+
# >> c = Bio::PAML::Codeml::Report.new(buf)
|
57
|
+
#
|
58
|
+
# Do we have two models?
|
59
|
+
#
|
60
|
+
# >> c.models.size
|
61
|
+
# => 2
|
62
|
+
# >> c.models[0].name
|
63
|
+
# => "M0"
|
64
|
+
# >> c.models[1].name
|
65
|
+
# => "M3"
|
66
|
+
#
|
67
|
+
# Check the general information
|
68
|
+
#
|
69
|
+
# >> c.num_sequences
|
70
|
+
# => 6
|
71
|
+
# >> c.num_codons
|
72
|
+
# => 134
|
73
|
+
# >> c.descr
|
74
|
+
# => "M0-3"
|
75
|
+
#
|
76
|
+
# Test whether the second model M3 is significant over M0
|
77
|
+
#
|
78
|
+
# >> c.significant
|
79
|
+
# => true
|
80
|
+
#
|
81
|
+
# Now fetch the results of the first model M0, and check its values
|
82
|
+
#
|
83
|
+
# >> m0 = c.models[0]
|
84
|
+
# >> m0.tree_length
|
85
|
+
# => 1.90227
|
86
|
+
# >> m0.lnL
|
87
|
+
# => -1125.800375
|
88
|
+
# >> m0.omega
|
89
|
+
# => 0.58589
|
90
|
+
# >> m0.dN_dS
|
91
|
+
# => 0.58589
|
92
|
+
# >> m0.kappa
|
93
|
+
# => 2.14311
|
94
|
+
# >> m0.alpha
|
95
|
+
# => nil
|
96
|
+
#
|
97
|
+
# We also have a tree (as a string)
|
98
|
+
#
|
99
|
+
# >> m0.tree
|
100
|
+
# => "((((PITG_23265T0: 0.000004, PITG_23253T0: 0.400074): 0.000004, PITG_23257T0: 0.952614): 0.000004, PITG_23264T0: 0.445507): 0.000004, PITG_23267T0: 0.011814, PITG_23293T0: 0.092242);"
|
101
|
+
#
|
102
|
+
# Check the M3 and its specific values
|
103
|
+
#
|
104
|
+
# >> m3 = c.models[1]
|
105
|
+
# >> m3.lnL
|
106
|
+
# => -1070.964046
|
107
|
+
# >> m3.classes.size
|
108
|
+
# => 3
|
109
|
+
# >> m3.classes[0]
|
110
|
+
# => {:w=>0.00928, :p=>0.56413}
|
111
|
+
#
|
112
|
+
# And the tree
|
113
|
+
#
|
114
|
+
# >> m3.tree
|
115
|
+
# => "((((PITG_23265T0: 0.000004, PITG_23253T0: 0.762597): 0.000004, PITG_23257T0: 2.721710): 0.000004, PITG_23264T0: 0.924326): 0.014562, PITG_23267T0: 0.000004, PITG_23293T0: 0.237433);"
|
116
|
+
#
|
117
|
+
# Next take the overall posterior analysis
|
118
|
+
#
|
119
|
+
# >> c.nb_sites.size
|
120
|
+
# => 44
|
121
|
+
# >> c.nb_sites[0].to_a
|
122
|
+
# => [17, "I", 0.988, 3.293]
|
123
|
+
#
|
124
|
+
# or by field
|
125
|
+
#
|
126
|
+
# >> codon = c.nb_sites[0]
|
127
|
+
# >> codon.position
|
128
|
+
# => 17
|
129
|
+
# >> codon.probability
|
130
|
+
# => 0.988
|
131
|
+
# >> codon.dN_dS
|
132
|
+
# => 3.293
|
133
|
+
#
|
134
|
+
# with aliases
|
135
|
+
#
|
136
|
+
# >> codon.p
|
137
|
+
# => 0.988
|
138
|
+
# >> codon.w
|
139
|
+
# => 3.293
|
140
|
+
#
|
141
|
+
# Now we generate special string 'graph' for positive selection. The
|
142
|
+
# following returns a string the length of the input alignment and
|
143
|
+
# shows the locations of positive selection:
|
144
|
+
#
|
145
|
+
# >> c.nb_sites.graph[0..32]
|
146
|
+
# => " ** * * *"
|
147
|
+
#
|
148
|
+
# And with dN/dS (high values are still an asterisk *)
|
149
|
+
#
|
150
|
+
# >> c.nb_sites.graph_omega[0..32]
|
151
|
+
# => " 3* 6 6 2"
|
152
|
+
#
|
153
|
+
# We also provide the raw buffers to adhere to the principle of
|
154
|
+
# unexpected use. Test the raw buffers for content:
|
155
|
+
#
|
156
|
+
# >> c.header.to_s =~ /seed/
|
157
|
+
# => 1
|
158
|
+
# >> m0.to_s =~ /one-ratio/
|
159
|
+
# => 3
|
160
|
+
# >> m3.to_s =~ /discrete/
|
161
|
+
# => 3
|
162
|
+
# >> c.footer.to_s =~ /Bayes/
|
163
|
+
# => 16
|
25
164
|
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
# milage may vary. See the source for the regular expressions.
|
165
|
+
# Finally we do a test on an M7+M8 run. Again, after loading the
|
166
|
+
# results file into _buf_
|
29
167
|
#
|
30
|
-
|
168
|
+
#--
|
169
|
+
# >> buf78 = BioTestFile.read('paml/codeml/models/results7-8.txt')
|
170
|
+
#
|
171
|
+
#
|
172
|
+
#++
|
173
|
+
#
|
174
|
+
# Invoke Bioruby's PAML codeml parser
|
175
|
+
#
|
176
|
+
# >> c = Bio::PAML::Codeml::Report.new(buf78)
|
177
|
+
#
|
178
|
+
# Do we have two models?
|
179
|
+
#
|
180
|
+
# >> c.models.size
|
181
|
+
# => 2
|
182
|
+
# >> c.models[0].name
|
183
|
+
# => "M7"
|
184
|
+
# >> c.models[1].name
|
185
|
+
# => "M8"
|
186
|
+
#
|
187
|
+
# Assert the results are significant
|
188
|
+
#
|
189
|
+
# >> c.significant
|
190
|
+
# => true
|
191
|
+
#
|
192
|
+
# Compared to M0/M3 there are some differences. The important ones
|
193
|
+
# are the parameters and the full Bayesian result available for M7/M8.
|
194
|
+
# This is the naive Bayesian result:
|
195
|
+
#
|
196
|
+
# >> c.nb_sites.size
|
197
|
+
# => 10
|
198
|
+
#
|
199
|
+
# And this is the full Bayesian result:
|
200
|
+
#
|
201
|
+
# >> c.sites.size
|
202
|
+
# => 30
|
203
|
+
# >> c.sites[0].to_a
|
204
|
+
# => [17, "I", 0.672, 2.847]
|
205
|
+
# >> c.sites.graph[0..32]
|
206
|
+
# => " ** * * *"
|
207
|
+
#
|
208
|
+
# Note the differences of omega with earlier M0-M3 naive Bayesian
|
209
|
+
# analysis:
|
210
|
+
#
|
211
|
+
# >> c.sites.graph_omega[0..32]
|
212
|
+
# => " 24 3 3 2"
|
213
|
+
#
|
214
|
+
# The locations are the same, but the omega differs.
|
31
215
|
#
|
32
|
-
# report = Bio::PAML::Codeml::Report.new(File.open(codeml_output_file).read)
|
33
|
-
# report.gene_rate # => Rate of gene evolution as defined be alpha
|
34
|
-
# report.tree_lengh # => Estimated phylogetic tree length
|
35
216
|
class Report < Bio::PAML::Common::Report
|
36
217
|
|
218
|
+
attr_reader :models, :header, :footer
|
219
|
+
|
220
|
+
# Parse codeml output file passed with +buf+, where buf contains
|
221
|
+
# the content of a codeml result file
|
222
|
+
def initialize buf
|
223
|
+
# split the main buffer into sections for each model, header and footer.
|
224
|
+
sections = buf.split("\nModel ")
|
225
|
+
model_num = sections.size-1
|
226
|
+
raise ReportError,"Incorrect codeml data models=#{model_num}" if model_num > 2
|
227
|
+
foot2 = sections[model_num].split("\nNaive ")
|
228
|
+
if foot2.size == 2
|
229
|
+
# We have a dual model
|
230
|
+
sections[model_num] = foot2[0]
|
231
|
+
@footer = 'Naive '+foot2[1]
|
232
|
+
@models = []
|
233
|
+
sections[1..-1].each do | model_buf |
|
234
|
+
@models.push Model.new(model_buf)
|
235
|
+
end
|
236
|
+
else
|
237
|
+
# A single model is run
|
238
|
+
sections = buf.split("\nTREE #")
|
239
|
+
model_num = sections.size-1
|
240
|
+
raise ReportError,"Can not parse single model file" if model_num != 1
|
241
|
+
@models = []
|
242
|
+
@models.push sections[1]
|
243
|
+
@footer = sections[1][/Time used/,1]
|
244
|
+
@single = ReportSingle.new(buf)
|
245
|
+
end
|
246
|
+
@header = sections[0]
|
247
|
+
end
|
248
|
+
|
249
|
+
# Give a short description of the models, for example 'M0-3'
|
250
|
+
def descr
|
251
|
+
num = @models.size
|
252
|
+
case num
|
253
|
+
when 0
|
254
|
+
'No model'
|
255
|
+
when 1
|
256
|
+
@models[0].name
|
257
|
+
else
|
258
|
+
@models[0].name + '-' + @models[1].modelnum.to_s
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
# Return the number of condons in the codeml alignment
|
263
|
+
def num_codons
|
264
|
+
@header.scan(/seed used = \d+\n\s+\d+\s+\d+/).to_s.split[5].to_i/3
|
265
|
+
end
|
266
|
+
|
267
|
+
# Return the number of sequences in the codeml alignment
|
268
|
+
def num_sequences
|
269
|
+
@header.scan(/seed used = \d+\n\s+\d+\s+\d+/).to_s.split[4].to_i
|
270
|
+
end
|
271
|
+
|
272
|
+
# Return a PositiveSites (naive empirical bayesian) object
|
273
|
+
def nb_sites
|
274
|
+
PositiveSites.new("Naive Empirical Bayes (NEB)",@footer,num_codons)
|
275
|
+
end
|
276
|
+
|
277
|
+
# Return a PositiveSites Bayes Empirical Bayes (BEB) analysis
|
278
|
+
def sites
|
279
|
+
PositiveSites.new("Bayes Empirical Bayes (BEB)",@footer,num_codons)
|
280
|
+
end
|
281
|
+
|
282
|
+
# If the number of models is two we can calculate whether the result is
|
283
|
+
# statistically significant, or not, at the 1% significance level. For
|
284
|
+
# example, for M7-8 the LRT statistic, or twice the log likelihood
|
285
|
+
# difference between the two compared models, may be compared against
|
286
|
+
# chi-square, with critical value 9.21 at the 1% significance level.
|
287
|
+
#
|
288
|
+
# Here we support a few likely combinations, M0-3, M1-2 and M7-8, used
|
289
|
+
# most often in literature. For other combinations, or a different
|
290
|
+
# significance level, you'll have to calculate chi-square yourself.
|
291
|
+
#
|
292
|
+
# Returns true or false. If no result is calculated this method
|
293
|
+
# raises an error
|
294
|
+
def significant
|
295
|
+
raise ReportError,"Wrong number of models #{@models.size}" if @models.size != 2
|
296
|
+
lnL1 = @models[0].lnL
|
297
|
+
model1 = @models[0].modelnum
|
298
|
+
lnL2 = @models[1].lnL
|
299
|
+
model2 = @models[1].modelnum
|
300
|
+
case [model1, model2]
|
301
|
+
when [0,3]
|
302
|
+
2*(lnL2-lnL1) > 13.2767 # chi2: p=0.01, df=4
|
303
|
+
when [1,2]
|
304
|
+
2*(lnL2-lnL1) > 9.2103 # chi2: p=0.01, df=2
|
305
|
+
when [7,8]
|
306
|
+
2*(lnL2-lnL1) > 9.2103 # chi2: p=0.01, df=2
|
307
|
+
else
|
308
|
+
raise ReportError,"Significance calculation for #{descr} not supported"
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
#:stopdoc:
|
313
|
+
|
314
|
+
# compatibility call for older interface (single models only)
|
315
|
+
def tree_log_likelihood
|
316
|
+
@single.tree_log_likelihood
|
317
|
+
end
|
318
|
+
|
319
|
+
# compatibility call for older interface (single models only)
|
320
|
+
def tree_length
|
321
|
+
@single.tree_length
|
322
|
+
end
|
323
|
+
|
324
|
+
# compatibility call for older interface (single models only)
|
325
|
+
def alpha
|
326
|
+
@single.alpha
|
327
|
+
end
|
328
|
+
|
329
|
+
# compatibility call for older interface (single models only)
|
330
|
+
def tree
|
331
|
+
@single.tree
|
332
|
+
end
|
333
|
+
|
334
|
+
#:startdoc:
|
335
|
+
|
336
|
+
end # Report
|
337
|
+
|
338
|
+
# ReportSingle is a simpler parser for a codeml report
|
339
|
+
# containing a single run. This is retained for
|
340
|
+
# backward compatibility mostly.
|
341
|
+
#
|
342
|
+
# The results of a single model (old style report parser)
|
343
|
+
#
|
344
|
+
#--
|
345
|
+
# >> buf = BioTestFile.read('paml/codeml/output.txt')
|
346
|
+
#++
|
347
|
+
#
|
348
|
+
# >> single = Bio::PAML::Codeml::Report.new(buf)
|
349
|
+
#
|
350
|
+
# >> single.tree_log_likelihood
|
351
|
+
# => -1817.465211
|
352
|
+
#
|
353
|
+
# >> single.tree_length
|
354
|
+
# => 0.77902
|
355
|
+
#
|
356
|
+
# >> single.alpha
|
357
|
+
# => 0.58871
|
358
|
+
#
|
359
|
+
# >> single.tree
|
360
|
+
# => "(((rabbit: 0.082889, rat: 0.187866): 0.038008, human: 0.055050): 0.033639, goat-cow: 0.096992, marsupial: 0.284574);"
|
361
|
+
#
|
362
|
+
class ReportSingle < Bio::PAML::Common::Report
|
363
|
+
|
37
364
|
attr_reader :tree_log_likelihood, :tree_length, :alpha, :tree
|
38
365
|
|
366
|
+
# Do not use
|
39
367
|
def initialize(codeml_report)
|
40
368
|
@tree_log_likelihood = pull_tree_log_likelihood(codeml_report)
|
41
369
|
@tree_length = pull_tree_length(codeml_report)
|
@@ -45,23 +373,253 @@ module Bio::PAML
|
|
45
373
|
|
46
374
|
private
|
47
375
|
|
376
|
+
# Do not use
|
48
377
|
def pull_tree_log_likelihood(text)
|
49
378
|
text[/lnL\(.+\):\s+(-?\d+(\.\d+)?)/,1].to_f
|
50
379
|
end
|
51
380
|
|
52
|
-
|
381
|
+
# Do not use
|
53
382
|
def pull_tree_length(text)
|
54
383
|
text[/tree length\s+=\s+ (-?\d+(\.\d+)?)/,1].to_f
|
55
384
|
end
|
56
385
|
|
386
|
+
# Do not use
|
57
387
|
def pull_alpha(text)
|
58
388
|
text[/alpha .+ =\s+(-?\d+(\.\d+)?)/,1].to_f
|
59
389
|
end
|
60
390
|
|
391
|
+
# Do not use
|
61
392
|
def pull_tree(text)
|
62
393
|
text[/([^\n]+)\n\nDetailed/m,1]
|
63
394
|
end
|
64
395
|
|
65
|
-
end #
|
66
|
-
|
67
|
-
|
396
|
+
end # ReportSingle
|
397
|
+
|
398
|
+
# Model class contains one of the models of a codeml run (e.g. M0)
|
399
|
+
# which is used as a test hypothesis for positive selection. This
|
400
|
+
# class is used by Codeml::Report.
|
401
|
+
class Model
|
402
|
+
|
403
|
+
# Create a model using the relevant information from the codeml
|
404
|
+
# result data (text buffer)
|
405
|
+
def initialize buf
|
406
|
+
@buf = buf
|
407
|
+
end
|
408
|
+
|
409
|
+
# Return the model number
|
410
|
+
def modelnum
|
411
|
+
@buf[0..0].to_i
|
412
|
+
end
|
413
|
+
|
414
|
+
# Return the model name, e.g. 'M0' or 'M7'
|
415
|
+
def name
|
416
|
+
'M'.to_s+modelnum.to_s
|
417
|
+
end
|
418
|
+
|
419
|
+
# Return codeml log likelihood of model
|
420
|
+
def lnL
|
421
|
+
@buf[/lnL\(.+\):\s+(-?\d+(\.\d+)?)/,1].to_f
|
422
|
+
end
|
423
|
+
|
424
|
+
# Return codeml omega of model
|
425
|
+
def omega
|
426
|
+
@buf[/omega \(dN\/dS\)\s+=\s+ (-?\d+(\.\d+)?)/,1].to_f
|
427
|
+
end
|
428
|
+
|
429
|
+
alias dN_dS omega
|
430
|
+
|
431
|
+
# Return codeml kappa of model, when available
|
432
|
+
def kappa
|
433
|
+
return nil if @buf !~ /kappa/
|
434
|
+
@buf[/kappa \(ts\/tv\)\s+=\s+ (-?\d+(\.\d+)?)/,1].to_f
|
435
|
+
end
|
436
|
+
|
437
|
+
# Return codeml alpha of model, when available
|
438
|
+
def alpha
|
439
|
+
return nil if @buf !~ /alpha/
|
440
|
+
@buf[/alpha .+ =\s+(-?\d+(\.\d+)?)/,1].to_f
|
441
|
+
end
|
442
|
+
|
443
|
+
# Return codeml treee length
|
444
|
+
def tree_length
|
445
|
+
@buf[/tree length\s+=\s+ (-?\d+(\.\d+)?)/,1].to_f
|
446
|
+
end
|
447
|
+
|
448
|
+
# Return codeml tree
|
449
|
+
def tree
|
450
|
+
@buf[/([^\n]+)\n\nDetailed/m,1]
|
451
|
+
end
|
452
|
+
|
453
|
+
# Return classes when available. For M3 it parses
|
454
|
+
#
|
455
|
+
# dN/dS (w) for site classes (K=3)
|
456
|
+
# p: 0.56413 0.35613 0.07974
|
457
|
+
# w: 0.00928 1.98252 23.44160
|
458
|
+
#
|
459
|
+
# and turns it into an array of Hash
|
460
|
+
#
|
461
|
+
# >> m3.classes[0]
|
462
|
+
# => {:w=>0.00928, :p=>0.56413}
|
463
|
+
def classes
|
464
|
+
return nil if @buf !~ /classes/
|
465
|
+
# probs = @buf.scan(/\np:\s+(\w+)\s+(\S+)\s+(\S+)/)
|
466
|
+
probs = @buf.scan(/\np:.*?\n/).to_s.split[1..3].map { |f| f.to_f }
|
467
|
+
ws = @buf.scan(/\nw:.*?\n/).to_s.split[1..3].map { |f| f.to_f }
|
468
|
+
ret = []
|
469
|
+
probs.each_with_index do | prob, i |
|
470
|
+
ret.push :p => prob, :w => ws[i]
|
471
|
+
end
|
472
|
+
ret
|
473
|
+
end
|
474
|
+
|
475
|
+
# Return the model information as a String
|
476
|
+
def to_s
|
477
|
+
@buf
|
478
|
+
end
|
479
|
+
end
|
480
|
+
|
481
|
+
# A record of codon sites, across the sequences in the alignment,
|
482
|
+
# showing evidence of positive selection.
|
483
|
+
#
|
484
|
+
# This class is used for storing both codeml's full Bayesian and naive
|
485
|
+
# Bayesian analysis
|
486
|
+
class PositiveSite
|
487
|
+
attr_reader :position
|
488
|
+
attr_reader :aaref
|
489
|
+
attr_reader :probability
|
490
|
+
attr_reader :omega
|
491
|
+
|
492
|
+
def initialize fields
|
493
|
+
@position = fields[0].to_i
|
494
|
+
@aaref = fields[1].to_s
|
495
|
+
@probability = fields[2].to_f
|
496
|
+
@omega = fields[3].to_f
|
497
|
+
end
|
498
|
+
|
499
|
+
# Return dN/dS (or omega) for this codon
|
500
|
+
def dN_dS
|
501
|
+
omega
|
502
|
+
end
|
503
|
+
|
504
|
+
alias w dN_dS
|
505
|
+
|
506
|
+
alias p probability
|
507
|
+
|
508
|
+
# Return contents as Array - useful for printing
|
509
|
+
def to_a
|
510
|
+
[ @position, @aaref, @probability, @omega ]
|
511
|
+
end
|
512
|
+
end
|
513
|
+
|
514
|
+
# List for the positive selection sites. PAML returns:
|
515
|
+
#
|
516
|
+
# Naive Empirical Bayes (NEB) analysis
|
517
|
+
# Positively selected sites (*: P>95%; **: P>99%)
|
518
|
+
# (amino acids refer to 1st sequence: PITG_23265T0)
|
519
|
+
#
|
520
|
+
# Pr(w>1) post mean +- SE for w
|
521
|
+
#
|
522
|
+
# 17 I 0.988* 3.293
|
523
|
+
# 18 H 1.000** 17.975
|
524
|
+
# 23 F 0.991** 6.283
|
525
|
+
# (...)
|
526
|
+
# 131 V 1.000** 22.797
|
527
|
+
# 132 R 1.000** 10.800
|
528
|
+
# (newline)
|
529
|
+
#
|
530
|
+
# these can be accessed using normal iterators. Also special
|
531
|
+
# methods are available for presenting this data
|
532
|
+
#
|
533
|
+
class PositiveSites < Array
|
534
|
+
|
535
|
+
attr_reader :descr
|
536
|
+
|
537
|
+
def initialize search, buf, num_codons
|
538
|
+
@num_codons = num_codons
|
539
|
+
if buf.index(search)==nil
|
540
|
+
raise ReportError,"No NB sites found for #{search}"
|
541
|
+
end
|
542
|
+
# Set description of this class
|
543
|
+
@descr = search
|
544
|
+
lines = buf.split("\n")
|
545
|
+
# find location of 'search'
|
546
|
+
start = 0
|
547
|
+
lines.each_with_index do | line, i |
|
548
|
+
if line.index(search) != nil
|
549
|
+
start = i
|
550
|
+
break
|
551
|
+
end
|
552
|
+
end
|
553
|
+
raise ReportError,"Out of bound error for <#{buf}>" if lines[start+6]==nil
|
554
|
+
lines[start+6..-1].each do | line |
|
555
|
+
break if line.strip == ""
|
556
|
+
fields = line.split
|
557
|
+
push PositiveSite.new(fields)
|
558
|
+
end
|
559
|
+
num = size()
|
560
|
+
@buf = lines[start..start+num+7].join("\n")
|
561
|
+
end
|
562
|
+
|
563
|
+
# Generate a graph - which is a simple string pointing out the positions
|
564
|
+
# showing evidence of positive selection pressure.
|
565
|
+
#
|
566
|
+
# >> c.sites.graph[0..32]
|
567
|
+
# => " ** * * *"
|
568
|
+
#
|
569
|
+
def graph
|
570
|
+
graph_to_s(lambda { |site| "*" })
|
571
|
+
end
|
572
|
+
|
573
|
+
# Generate a graph - which is a simple string pointing out the positions
|
574
|
+
# showing evidence of positive selection pressure, with dN/dS values
|
575
|
+
# (high values are an asterisk *)
|
576
|
+
#
|
577
|
+
# >> c.sites.graph_omega[0..32]
|
578
|
+
# => " 24 3 3 2"
|
579
|
+
#
|
580
|
+
def graph_omega
|
581
|
+
graph_to_s(lambda { |site|
|
582
|
+
symbol = "*"
|
583
|
+
symbol = site.omega.to_i.to_s if site.omega.abs <= 10.0
|
584
|
+
symbol
|
585
|
+
})
|
586
|
+
end
|
587
|
+
|
588
|
+
# Graph of amino acids of first sequence at locations
|
589
|
+
def graph_seq
|
590
|
+
graph_to_s(lambda { |site |
|
591
|
+
symbol = site.aaref
|
592
|
+
})
|
593
|
+
end
|
594
|
+
|
595
|
+
# Return the positive selection information as a String
|
596
|
+
def to_s
|
597
|
+
@buf
|
598
|
+
end
|
599
|
+
|
600
|
+
# :nodoc:
|
601
|
+
# Creates a graph of sites, adjusting for gaps. This generator
|
602
|
+
# is also called from HtmlPositiveSites. The _fill_ is used
|
603
|
+
# to fill out the gaps
|
604
|
+
def graph_to_s func, fill=' '
|
605
|
+
ret = ""
|
606
|
+
pos = 0
|
607
|
+
each do | site |
|
608
|
+
symbol = func.call(site)
|
609
|
+
gapsize = site.position-pos-1
|
610
|
+
ret += fill*gapsize + symbol
|
611
|
+
pos = site.position
|
612
|
+
end
|
613
|
+
gapsize = @num_codons - pos - 1
|
614
|
+
ret += fill*gapsize if gapsize > 0
|
615
|
+
ret
|
616
|
+
end
|
617
|
+
end
|
618
|
+
|
619
|
+
# Supporting error class
|
620
|
+
class ReportError < RuntimeError
|
621
|
+
end
|
622
|
+
|
623
|
+
|
624
|
+
end # Codeml
|
625
|
+
end # Bio::PAML
|