bio 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,50 @@
1
+ #
2
+ # bio/util/color_scheme/nucleotide.rb - Color codings for nucleotides
3
+ #
4
+ # Copyright:: Copyright (C) 2005 Trevor Wennblom <trevor@corevx.com>
5
+ # License:: LGPL
6
+ #
7
+ # $Id: nucleotide.rb,v 1.2 2005/12/13 14:58:07 trevor Exp $
8
+ #
9
+ #
10
+ #--
11
+ #
12
+ # This library is free software; you can redistribute it and/or
13
+ # modify it under the terms of the GNU Lesser General Public
14
+ # License as published by the Free Software Foundation; either
15
+ # version 2 of the License, or (at your option) any later version.
16
+ #
17
+ # This library is distributed in the hope that it will be useful,
18
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
19
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
+ # Lesser General Public License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Lesser General Public
23
+ # License along with this library; if not, write to the Free Software
24
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
+ #
26
+ #++
27
+ #
28
+ #
29
+
30
+ require 'bio/util/color_scheme'
31
+
32
+ module Bio::ColorScheme
33
+ class Nucleotide < Simple
34
+
35
+ #########
36
+ protected
37
+ #########
38
+
39
+ @colors = {
40
+ 'A' => '64F73F',
41
+ 'C' => 'FFB340',
42
+ 'G' => 'EB413C',
43
+ 'T' => '3C88EE',
44
+ 'U' => '3C88EE',
45
+ }
46
+ @colors.default = 'FFFFFF' # return white by default
47
+
48
+ end
49
+ NA = Nuc = Nucleotide
50
+ end
@@ -0,0 +1,78 @@
1
+ #
2
+ # bio/util/color_scheme/strand.rb - Color codings for strand propensity
3
+ #
4
+ # Copyright:: Copyright (C) 2005 Trevor Wennblom <trevor@corevx.com>
5
+ # License:: LGPL
6
+ #
7
+ # $Id: strand.rb,v 1.3 2005/12/13 14:58:07 trevor Exp $
8
+ #
9
+ #
10
+ #--
11
+ #
12
+ # This library is free software; you can redistribute it and/or
13
+ # modify it under the terms of the GNU Lesser General Public
14
+ # License as published by the Free Software Foundation; either
15
+ # version 2 of the License, or (at your option) any later version.
16
+ #
17
+ # This library is distributed in the hope that it will be useful,
18
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
19
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
+ # Lesser General Public License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Lesser General Public
23
+ # License along with this library; if not, write to the Free Software
24
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
+ #
26
+ #++
27
+ #
28
+ #
29
+
30
+ require 'bio/util/color_scheme'
31
+
32
+ module Bio::ColorScheme
33
+ class Strand < Score
34
+
35
+ #########
36
+ protected
37
+ #########
38
+
39
+ def self.score_to_rgb_hex(score, min, max)
40
+ percent = score_to_percent(score, min, max)
41
+ rgb_percent_to_hex(percent, percent, 1.0-percent)
42
+ end
43
+
44
+ @colors = {}
45
+ @scores = {
46
+ 'A' => 0.83,
47
+ 'C' => 1.19,
48
+ 'D' => 0.54,
49
+ 'E' => 0.37,
50
+ 'F' => 1.38,
51
+ 'G' => 0.75,
52
+ 'H' => 0.87,
53
+ 'I' => 1.6,
54
+ 'K' => 0.74,
55
+ 'L' => 1.3,
56
+ 'M' => 1.05,
57
+ 'N' => 0.89,
58
+ 'P' => 0.55,
59
+ 'Q' => 1.1,
60
+ 'R' => 0.93,
61
+ 'S' => 0.75,
62
+ 'T' => 1.19,
63
+ 'U' => 0.0,
64
+ 'V' => 1.7,
65
+ 'W' => 1.37,
66
+ 'Y' => 1.47,
67
+
68
+ 'B' => 0.72,
69
+ 'X' => 1.0,
70
+ 'Z' => 0.74,
71
+ }
72
+ @min = 0.37
73
+ @max = 1.7
74
+ @scores.each { |k,s| @colors[k] = score_to_rgb_hex(s, @min, @max) }
75
+ @colors.default = 'FFFFFF' # return white by default
76
+
77
+ end
78
+ end
@@ -0,0 +1,69 @@
1
+ #
2
+ # bio/util/color_scheme/taylor.rb - Taylor color codings for amino acids
3
+ #
4
+ # Copyright:: Copyright (C) 2005 Trevor Wennblom <trevor@corevx.com>
5
+ # License:: LGPL
6
+ #
7
+ # $Id: taylor.rb,v 1.2 2005/12/13 14:58:07 trevor Exp $
8
+ #
9
+ #
10
+ #--
11
+ #
12
+ # This library is free software; you can redistribute it and/or
13
+ # modify it under the terms of the GNU Lesser General Public
14
+ # License as published by the Free Software Foundation; either
15
+ # version 2 of the License, or (at your option) any later version.
16
+ #
17
+ # This library is distributed in the hope that it will be useful,
18
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
19
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
+ # Lesser General Public License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Lesser General Public
23
+ # License along with this library; if not, write to the Free Software
24
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
+ #
26
+ #++
27
+ #
28
+ #
29
+
30
+ require 'bio/util/color_scheme'
31
+
32
+ module Bio::ColorScheme
33
+ class Taylor < Simple
34
+
35
+ #########
36
+ protected
37
+ #########
38
+
39
+ @colors = {
40
+ 'A' => 'CCFF00',
41
+ 'C' => 'FFFF00',
42
+ 'D' => 'FF0000',
43
+ 'E' => 'FF0066',
44
+ 'F' => '00FF66',
45
+ 'G' => 'FF9900',
46
+ 'H' => '0066FF',
47
+ 'I' => '66FF00',
48
+ 'K' => '6600FF',
49
+ 'L' => '33FF00',
50
+ 'M' => '00FF00',
51
+ 'N' => 'CC00FF',
52
+ 'P' => 'FFCC00',
53
+ 'Q' => 'FF00CC',
54
+ 'R' => '0000FF',
55
+ 'S' => 'FF3300',
56
+ 'T' => 'FF6600',
57
+ 'U' => 'FFFFFF',
58
+ 'V' => '99FF00',
59
+ 'W' => '00CCFF',
60
+ 'Y' => '00FFCC',
61
+
62
+ 'B' => 'FFFFFF',
63
+ 'X' => 'FFFFFF',
64
+ 'Z' => 'FFFFFF',
65
+ }
66
+ @colors.default = 'FFFFFF' # return white by default
67
+
68
+ end
69
+ end
@@ -0,0 +1,78 @@
1
+ #
2
+ # bio/util/color_scheme/turn.rb - Color codings for turn propensity
3
+ #
4
+ # Copyright:: Copyright (C) 2005 Trevor Wennblom <trevor@corevx.com>
5
+ # License:: LGPL
6
+ #
7
+ # $Id: turn.rb,v 1.2 2005/12/13 14:58:07 trevor Exp $
8
+ #
9
+ #
10
+ #--
11
+ #
12
+ # This library is free software; you can redistribute it and/or
13
+ # modify it under the terms of the GNU Lesser General Public
14
+ # License as published by the Free Software Foundation; either
15
+ # version 2 of the License, or (at your option) any later version.
16
+ #
17
+ # This library is distributed in the hope that it will be useful,
18
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
19
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
+ # Lesser General Public License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Lesser General Public
23
+ # License along with this library; if not, write to the Free Software
24
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
+ #
26
+ #++
27
+ #
28
+ #
29
+
30
+ require 'bio/util/color_scheme'
31
+
32
+ module Bio::ColorScheme
33
+ class Turn < Score
34
+
35
+ #########
36
+ protected
37
+ #########
38
+
39
+ def self.score_to_rgb_hex(score, min, max)
40
+ percent = score_to_percent(score, min, max)
41
+ rgb_percent_to_hex(percent, 1.0-percent, 1.0-percent)
42
+ end
43
+
44
+ @colors = {}
45
+ @scores = {
46
+ 'A' => 0.66,
47
+ 'C' => 1.19,
48
+ 'D' => 1.46,
49
+ 'E' => 0.74,
50
+ 'F' => 0.6,
51
+ 'G' => 1.56,
52
+ 'H' => 0.95,
53
+ 'I' => 0.47,
54
+ 'K' => 1.01,
55
+ 'L' => 0.59,
56
+ 'M' => 0.6,
57
+ 'N' => 1.56,
58
+ 'P' => 1.52,
59
+ 'Q' => 0.98,
60
+ 'R' => 0.95,
61
+ 'S' => 1.43,
62
+ 'T' => 0.96,
63
+ 'U' => 0,
64
+ 'V' => 0.5,
65
+ 'W' => 0.96,
66
+ 'Y' => 1.14,
67
+
68
+ 'B' => 1.51,
69
+ 'X' => 1.0,
70
+ 'Z' => 0.86,
71
+ }
72
+ @min = 0.47
73
+ @max = 1.56
74
+ @scores.each { |k,s| @colors[k] = score_to_rgb_hex(s, @min, @max) }
75
+ @colors.default = 'FFFFFF' # return white by default
76
+
77
+ end
78
+ end
@@ -0,0 +1,69 @@
1
+ #
2
+ # bio/util/color_scheme/zappo.rb - Zappo color codings for amino acids
3
+ #
4
+ # Copyright:: Copyright (C) 2005 Trevor Wennblom <trevor@corevx.com>
5
+ # License:: LGPL
6
+ #
7
+ # $Id: zappo.rb,v 1.2 2005/12/13 14:58:07 trevor Exp $
8
+ #
9
+ #
10
+ #--
11
+ #
12
+ # This library is free software; you can redistribute it and/or
13
+ # modify it under the terms of the GNU Lesser General Public
14
+ # License as published by the Free Software Foundation; either
15
+ # version 2 of the License, or (at your option) any later version.
16
+ #
17
+ # This library is distributed in the hope that it will be useful,
18
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
19
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
+ # Lesser General Public License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Lesser General Public
23
+ # License along with this library; if not, write to the Free Software
24
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
+ #
26
+ #++
27
+ #
28
+ #
29
+
30
+ require 'bio/util/color_scheme'
31
+
32
+ module Bio::ColorScheme
33
+ class Zappo < Simple
34
+
35
+ #########
36
+ protected
37
+ #########
38
+
39
+ @colors = {
40
+ 'A' => 'FFAFAF',
41
+ 'C' => 'FFFF00',
42
+ 'D' => 'FF0000',
43
+ 'E' => 'FF0000',
44
+ 'F' => 'FFC800',
45
+ 'G' => 'FF00FF',
46
+ 'H' => 'FF0000',
47
+ 'I' => 'FFAFAF',
48
+ 'K' => '6464FF',
49
+ 'L' => 'FFAFAF',
50
+ 'M' => 'FFAFAF',
51
+ 'N' => '00FF00',
52
+ 'P' => 'FF00FF',
53
+ 'Q' => '00FF00',
54
+ 'R' => '6464FF',
55
+ 'S' => '00FF00',
56
+ 'T' => '00FF00',
57
+ 'U' => 'FFFFFF',
58
+ 'V' => 'FFAFAF',
59
+ 'W' => 'FFC800',
60
+ 'Y' => 'FFC800',
61
+
62
+ 'B' => 'FFFFFF',
63
+ 'X' => 'FFFFFF',
64
+ 'Z' => 'FFFFFF',
65
+ }
66
+ @colors.default = 'FFFFFF' # return white by default
67
+
68
+ end
69
+ end
@@ -0,0 +1,337 @@
1
+ module Bio
2
+
3
+ #
4
+ # bio/util/contingency_table.rb - Statistical contingency table analysis for aligned sequences
5
+ #
6
+ # Copyright:: Copyright (C) 2005 Trevor Wennblom <trevor@corevx.com>
7
+ # License:: LGPL
8
+ #
9
+ # $Id: contingency_table.rb,v 1.2 2005/12/13 14:58:37 trevor Exp $
10
+ #
11
+ #
12
+ #--
13
+ #
14
+ # This library is free software; you can redistribute it and/or
15
+ # modify it under the terms of the GNU Lesser General Public
16
+ # License as published by the Free Software Foundation; either
17
+ # version 2 of the License, or (at your option) any later version.
18
+ #
19
+ # This library is distributed in the hope that it will be useful,
20
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22
+ # Lesser General Public License for more details.
23
+ #
24
+ # You should have received a copy of the GNU Lesser General Public
25
+ # License along with this library; if not, write to the Free Software
26
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27
+ #
28
+ #++
29
+ #
30
+ #
31
+
32
+ =begin rdoc
33
+ bio/util/contingency_table.rb - Statistical contingency table analysis for aligned sequences
34
+
35
+ == Synopsis
36
+
37
+ The Bio::ContingencyTable class provides basic statistical contingency table
38
+ analysis for two positions within aligned sequences.
39
+
40
+ When ContingencyTable is instantiated the set of characters in the aligned sequences may be
41
+ passed to it as an array. This is important since it uses these characters
42
+ to create the table's rows and columns. If this array is not passed it will
43
+ use it's default of an amino acid and nucleotide alphabet in lowercase along with the
44
+ clustal spacer '-'.
45
+
46
+ To get data from the table the most used functions will be chi_square and contingency_coefficient:
47
+ ctable = Bio::ContingencyTable.new()
48
+ ctable['a']['t'] += 1
49
+ # .. put more values into the table
50
+ puts ctable.chi_square
51
+ puts ctable.contingency_coefficient # between 0.0 and 1.0
52
+
53
+ The contingency_coefficient represents the degree of correlation of change between two
54
+ sequence positions in a multiple-sequence alignment. 0.0 indicates no correlation, 1.0 is the
55
+ maximum correlation.
56
+
57
+
58
+ == Further Reading
59
+
60
+ * http://en.wikipedia.org/wiki/Contingency_table
61
+ * http://www.physics.csbsju.edu/stats/exact.details.html
62
+ * Numerical Recipes in C by Press, Flannery, Teukolsky, and Vetterling
63
+
64
+
65
+ == Usage
66
+
67
+ What follows is an example of ContingencyTable in typical usage analyzing results from a clustal alignment.
68
+
69
+ require 'bio'
70
+ require 'bio/contingency_table'
71
+
72
+ seqs = {}
73
+ max_length = 0
74
+ Bio::ClustalW::Report.new( IO.read('sample.aln') ).to_a.each do |entry|
75
+ data = entry.data.strip
76
+ seqs[entry.definition] = data.downcase
77
+ max_length = data.size if max_length == 0
78
+ raise "Aligned sequences must be the same length!" unless data.size == max_length
79
+ end
80
+
81
+ VERBOSE = true
82
+ puts "i\tj\tchi_square\tcontingency_coefficient" if VERBOSE
83
+ correlations = {}
84
+
85
+ 0.upto(max_length - 1) do |i|
86
+ (i+1).upto(max_length - 1) do |j|
87
+ ctable = Bio::ContingencyTable.new()
88
+ seqs.each_value { |seq| ctable.table[ seq[i].chr ][ seq[j].chr ] += 1 }
89
+
90
+ chi_square = ctable.chi_square
91
+ contingency_coefficient = ctable.contingency_coefficient
92
+ puts [(i+1), (j+1), chi_square, contingency_coefficient].join("\t") if VERBOSE
93
+
94
+ correlations["#{i+1},#{j+1}"] = contingency_coefficient
95
+ correlations["#{j+1},#{i+1}"] = contingency_coefficient # Both ways are accurate
96
+ end
97
+ end
98
+
99
+ require 'yaml'
100
+ File.new('results.yml', 'a+') { |f| f.puts correlations.to_yaml }
101
+
102
+
103
+ == Tutorial
104
+
105
+ ContingencyTable returns the statistical significance of change between two positions in an alignment.
106
+ If you would like to see how every possible combination of positions in your alignment compares to one another
107
+ you must set this up yourself. Hopefully the provided examples will help you get started without
108
+ too much trouble.
109
+
110
+ def lite_example(sequences, max_length, characters)
111
+
112
+ %w{i j chi_square contingency_coefficient}.each { |x| print x.ljust(12) }
113
+ puts
114
+
115
+ 0.upto(max_length - 1) do |i|
116
+ (i+1).upto(max_length - 1) do |j|
117
+ ctable = Bio::ContingencyTable.new( characters )
118
+ sequences.each do |seq|
119
+ i_char = seq[i].chr
120
+ j_char = seq[j].chr
121
+ ctable.table[i_char][j_char] += 1
122
+ end
123
+ chi_square = ctable.chi_square
124
+ contingency_coefficient = ctable.contingency_coefficient
125
+ [(i+1), (j+1), chi_square, contingency_coefficient].each { |x| print x.to_s.ljust(12) }
126
+ puts
127
+ end
128
+ end
129
+
130
+ end
131
+
132
+ allowed_letters = Array.new
133
+ allowed_letters = 'abcdefghijk'.split('')
134
+
135
+ seqs = Array.new
136
+ seqs << 'abcde'
137
+ seqs << 'abcde'
138
+ seqs << 'aacje'
139
+ seqs << 'aacae'
140
+
141
+ length_of_every_sequence = seqs[0].size # 5 letters long
142
+
143
+ lite_example(seqs, length_of_every_sequence, allowed_letters)
144
+
145
+
146
+ Producing the following results:
147
+
148
+ i j chi_square contingency_coefficient
149
+ 1 2 0.0 0.0
150
+ 1 3 0.0 0.0
151
+ 1 4 0.0 0.0
152
+ 1 5 0.0 0.0
153
+ 2 3 0.0 0.0
154
+ 2 4 4.0 0.707106781186548
155
+ 2 5 0.0 0.0
156
+ 3 4 0.0 0.0
157
+ 3 5 0.0 0.0
158
+ 4 5 0.0 0.0
159
+
160
+ The position i=2 and j=4 has a high contingency coefficient indicating that the changes at these
161
+ positions are related. Note that i and j are arbitrary, this could be represented as i=4 and j=2
162
+ since they both refer to position two and position four in the alignment. Here are some more examples:
163
+
164
+ seqs = Array.new
165
+ seqs << 'abcde'
166
+ seqs << 'abcde'
167
+ seqs << 'aacje'
168
+ seqs << 'aacae'
169
+ seqs << 'akcfe'
170
+ seqs << 'akcfe'
171
+
172
+ length_of_every_sequence = seqs[0].size # 5 letters long
173
+
174
+ lite_example(seqs, length_of_every_sequence, allowed_letters)
175
+
176
+
177
+ Results:
178
+
179
+ i j chi_square contingency_coefficient
180
+ 1 2 0.0 0.0
181
+ 1 3 0.0 0.0
182
+ 1 4 0.0 0.0
183
+ 1 5 0.0 0.0
184
+ 2 3 0.0 0.0
185
+ 2 4 12.0 0.816496580927726
186
+ 2 5 0.0 0.0
187
+ 3 4 0.0 0.0
188
+ 3 5 0.0 0.0
189
+ 4 5 0.0 0.0
190
+
191
+ Here we can see that the strength of the correlation of change has increased when more data is added with correlated changes at the same positions.
192
+
193
+ seqs = Array.new
194
+ seqs << 'abcde'
195
+ seqs << 'abcde'
196
+ seqs << 'kacje' # changed first letter
197
+ seqs << 'aacae'
198
+ seqs << 'akcfa' # changed last letter
199
+ seqs << 'akcfe'
200
+
201
+ length_of_every_sequence = seqs[0].size # 5 letters long
202
+
203
+ lite_example(seqs, length_of_every_sequence, allowed_letters)
204
+
205
+
206
+ Results:
207
+
208
+ i j chi_square contingency_coefficient
209
+ 1 2 2.4 0.534522483824849
210
+ 1 3 0.0 0.0
211
+ 1 4 6.0 0.707106781186548
212
+ 1 5 0.24 0.196116135138184
213
+ 2 3 0.0 0.0
214
+ 2 4 12.0 0.816496580927726
215
+ 2 5 2.4 0.534522483824849
216
+ 3 4 0.0 0.0
217
+ 3 5 0.0 0.0
218
+ 4 5 2.4 0.534522483824849
219
+
220
+ With random changes it becomes more difficult to identify correlated changes, yet positions two
221
+ and four still have the highest correlation as indicated by the contingency coefficient. The
222
+ best way to improve the accuracy of your results, as is often the case with statistics, is to
223
+ increase the sample size.
224
+
225
+
226
+ == A Note on Efficiency
227
+
228
+ ContingencyTable is slow. It involves many calculations for even a seemingly small five-string data set.
229
+ Even worse, it's very dependent on matrix traversal, and this is done with two dimensional hashes which
230
+ dashes any hope of decent speed.
231
+
232
+ Finally, half of the matrix is redundant and positions could be summed with their companion position to reduce
233
+ calculations. For example the positions (5,2) and (2,5) could both have their values added together and
234
+ just stored in (2,5) while (5,2) could be an illegal position. Also, positions (1,1), (2,2), (3,3), etc.
235
+ will never be used.
236
+
237
+ The purpose of this package is flexibility and education. The code is short and to the point in
238
+ aims of achieving that purpose. If the BioRuby project moves towards C extensions in the future a
239
+ professional caliber version will likely be created.
240
+
241
+
242
+ == Author
243
+ Trevor Wennblom <trevor@corevx.com>
244
+
245
+
246
+ == Copyright
247
+ Copyright (C) 2005 Trevor Wennblom
248
+ Licensed under the same terms as BioRuby.
249
+
250
+ =end
251
+
252
+ class ContingencyTable
253
+ # Since we're making this math-notation friendly here is the layout of @table:
254
+ # * @table[row][column]
255
+ # * @table[i][j]
256
+ # * @table[y][x]
257
+ attr_accessor :table
258
+ attr_reader :characters
259
+
260
+ # Create a ContingencyTable that has characters_in_sequence.size rows and
261
+ # characters_in_sequence.size columns for each row
262
+ def initialize(characters_in_sequences = nil)
263
+ @characters = ( characters_in_sequences or %w{a c d e f g h i k l m n p q r s t v w y - x u} )
264
+ tmp = Hash[*@characters.collect { |v| [v, 0] }.flatten]
265
+ @table = Hash[*@characters.collect { |v| [v, tmp.dup] }.flatten]
266
+ end
267
+
268
+ # Report the sum of all values in a given row
269
+ def row_sum(i)
270
+ total = 0
271
+ @table[i].each { |k, v| total += v }
272
+ total
273
+ end
274
+
275
+ # Report the sum of all values in a given column
276
+ def column_sum(j)
277
+ total = 0
278
+ @table.each { |row_key, column| total += column[j] }
279
+ total
280
+ end
281
+
282
+ # Report the sum of all values in all columns.
283
+ #
284
+ # * This is the same thing as asking for the sum of all values in the table.
285
+ #
286
+ def column_sum_all
287
+ total = 0
288
+ @characters.each { |j| total += column_sum(j) }
289
+ total
290
+ end
291
+
292
+ # Report the sum of all values in all rows.
293
+ #
294
+ # * This is the same thing as asking for the sum of all values in the table.
295
+ #
296
+ def row_sum_all
297
+ total = 0
298
+ @characters.each { |i| total += row_sum(i) }
299
+ total
300
+ end
301
+ alias table_sum_all row_sum_all
302
+
303
+ #
304
+ # e(sub:ij) = (r(sub:i)/N) * (c(sub:j))
305
+ #
306
+ def expected(i, j)
307
+ (row_sum(i).to_f / table_sum_all) * column_sum(j)
308
+ end
309
+
310
+ # Report the chi square of the entire table
311
+ def chi_square
312
+ total = 0
313
+ c = @characters
314
+ max = c.size - 1
315
+ @characters.each do |i| # Loop through every row in the ContingencyTable
316
+ @characters.each do |j| # Loop through every column in the ContingencyTable
317
+ total += chi_square_element(i, j)
318
+ end
319
+ end
320
+ total
321
+ end
322
+
323
+ # Report the chi square relation of two elements in the table
324
+ def chi_square_element(i, j)
325
+ eij = expected(i, j)
326
+ return 0 if eij == 0
327
+ ( @table[i][j] - eij )**2 / eij
328
+ end
329
+
330
+ # Report the contingency coefficient of the table
331
+ def contingency_coefficient
332
+ c_s = chi_square
333
+ Math.sqrt(c_s / (table_sum_all + c_s) )
334
+ end
335
+
336
+ end
337
+ end