bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,51 @@
1
+ #
2
+ # bio/db/kegg/brite.rb - KEGG/BRITE database class
3
+ #
4
+ # Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: brite.rb,v 0.6 2005/09/08 01:22:11 k Exp $
21
+ #
22
+
23
+ require 'bio/db'
24
+
25
+ module Bio
26
+
27
+ class KEGG
28
+
29
+ class BRITE < KEGGDB
30
+
31
+ DELIMITER = RS = "\n///\n"
32
+ TAGSIZE = 12
33
+
34
+ def initialize(entry)
35
+ super(entry, TAGSIZE)
36
+ end
37
+
38
+ # ENTRY
39
+ # DEFINITION
40
+ # RELATION
41
+ # FACTORS
42
+ # INTERACTION
43
+ # SOURCE
44
+ # REFERENCE
45
+
46
+ end
47
+
48
+ end
49
+
50
+ end
51
+
@@ -0,0 +1,88 @@
1
+ #
2
+ # bio/db/kegg/cell.rb - KEGG/CELL database class
3
+ #
4
+ # Copyright (C) 2001 KAWASHIMA Shuichi <s@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: cell.rb,v 1.7 2005/09/08 01:22:11 k Exp $
21
+ #
22
+
23
+ require "bio/db"
24
+
25
+ module Bio
26
+
27
+ class KEGG
28
+
29
+ class CELL < KEGGDB
30
+
31
+ DELIMITER = RS = "\n///\n"
32
+ TAGSIZE = 12
33
+
34
+ def initialize(entry)
35
+ super(entry, TAGSIZE)
36
+ end
37
+
38
+ def entry_id
39
+ field_fetch('ENTRY')
40
+ end
41
+
42
+ def definition
43
+ field_fetch('DEFINITION')
44
+ end
45
+
46
+ def org
47
+ field_fetch('ORGANISM')
48
+ end
49
+ alias organism org
50
+
51
+ def mother
52
+ field_fetch('MOTHER')
53
+ end
54
+
55
+ def daughter
56
+ field_fetch('DAUGHTER').gsub(/ /, '').split(/,/)
57
+ end
58
+
59
+ def sister
60
+ field_fetch('SISTER')
61
+ end
62
+
63
+ def fate
64
+ field_fetch('CELL_FATE').gsub(/ /, '').split(/,/)
65
+ end
66
+
67
+ def contact
68
+ field_fetch('CONTACT').gsub(/ /, '').split(/,/)
69
+ end
70
+
71
+ def expression
72
+ field_fetch('EXPRESSION')
73
+ end
74
+
75
+ def fig
76
+ field_fetch('FIGURE')
77
+ end
78
+
79
+ def ref
80
+ field_fetch('REFERENCE')
81
+ end
82
+
83
+ end
84
+
85
+ end
86
+
87
+ end
88
+
@@ -0,0 +1,130 @@
1
+ #
2
+ # bio/db/kegg/compound.rb - KEGG COMPOUND database class
3
+ #
4
+ # Copyright (C) 2001, 2002, 2004 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: compound.rb,v 0.11 2005/09/08 01:22:11 k Exp $
21
+ #
22
+
23
+ require 'bio/db'
24
+
25
+ module Bio
26
+
27
+ class KEGG
28
+
29
+ class COMPOUND < KEGGDB
30
+
31
+ DELIMITER = RS = "\n///\n"
32
+ TAGSIZE = 12
33
+
34
+ def initialize(entry)
35
+ super(entry, TAGSIZE)
36
+ end
37
+
38
+ # ENTRY
39
+ def entry_id
40
+ unless @data['ENTRY']
41
+ @data['ENTRY'] = fetch('ENTRY').split(/\s+/).first
42
+ end
43
+ @data['ENTRY']
44
+ end
45
+
46
+ # NAME
47
+ def names
48
+ lines_fetch('NAME')
49
+ end
50
+ def name
51
+ names[0]
52
+ end
53
+
54
+ # FORMULA
55
+ def formula
56
+ field_fetch('FORMULA')
57
+ end
58
+
59
+ # MASS
60
+ def mass
61
+ field_fetch('MASS').to_f
62
+ end
63
+
64
+ # REACTION
65
+ def reactions
66
+ unless @data['REACTION']
67
+ @data['REACTION'] = fetch('REACTION').split(/\s+/)
68
+ end
69
+ @data['REACTION']
70
+ end
71
+
72
+ # RPAIR
73
+ def rpairs
74
+ unless @data['RPAIR']
75
+ @data['RPAIR'] = fetch('RPAIR').split(/\s+/)
76
+ end
77
+ @data['RPAIR']
78
+ end
79
+
80
+ # PATHWAY
81
+ def pathways
82
+ lines_fetch('PATHWAY')
83
+ end
84
+
85
+ # ENZYME
86
+ def enzymes
87
+ unless @data['ENZYME']
88
+ field = fetch('ENZYME')
89
+ if /\(/.match(field) # old version
90
+ @data['ENZYME'] = field.scan(/\S+ \(\S+\)/)
91
+ else
92
+ @data['ENZYME'] = field.scan(/\S+/)
93
+ end
94
+ end
95
+ @data['ENZYME']
96
+ end
97
+
98
+ # DBLINKS
99
+ def dblinks
100
+ lines_fetch('DBLINKS')
101
+ end
102
+
103
+ # ATOM, BOND
104
+ def kcf
105
+ return "#{get('ATOM')}#{get('BOND')}"
106
+ end
107
+
108
+ end
109
+
110
+ end
111
+
112
+ end
113
+
114
+
115
+ if __FILE__ == $0
116
+ entry = ARGF.read
117
+ cpd = Bio::KEGG::COMPOUND.new(entry)
118
+ p cpd.entry_id
119
+ p cpd.names
120
+ p cpd.name
121
+ p cpd.formula
122
+ p cpd.mass
123
+ p cpd.reactions
124
+ p cpd.rpairs
125
+ p cpd.pathways
126
+ p cpd.enzymes
127
+ p cpd.dblinks
128
+ p cpd.kcf
129
+ end
130
+
@@ -0,0 +1,125 @@
1
+ #
2
+ # bio/db/kegg/enzyme.rb - KEGG/ENZYME database class
3
+ #
4
+ # Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: enzyme.rb,v 0.8 2005/09/08 01:22:11 k Exp $
21
+ #
22
+
23
+ require 'bio/db'
24
+
25
+ module Bio
26
+
27
+ class KEGG
28
+
29
+ class ENZYME < KEGGDB
30
+
31
+ DELIMITER = RS = "\n///\n"
32
+ TAGSIZE = 12
33
+
34
+ def initialize(entry)
35
+ super(entry, TAGSIZE)
36
+ end
37
+
38
+ # ENTRY
39
+ def entry_id
40
+ field_fetch('ENTRY')
41
+ end
42
+
43
+ # NAME
44
+ def names
45
+ lines_fetch('NAME')
46
+ end
47
+ def name
48
+ names[0]
49
+ end
50
+
51
+ # CLASS
52
+ def classes
53
+ lines_fetch('CLASS')
54
+ end
55
+
56
+ # SYSNAME
57
+ def sysname
58
+ field_fetch('SYSNAME')
59
+ end
60
+
61
+ # REACTION ';'
62
+ def reaction
63
+ field_fetch('REACTION')
64
+ end
65
+
66
+ # SUBSTRATE
67
+ def substrates
68
+ lines_fetch('SUBSTRATE')
69
+ end
70
+
71
+ # PRODUCT
72
+ def products
73
+ lines_fetch('PRODUCT')
74
+ end
75
+
76
+ # COFACTOR
77
+ def cofactors
78
+ lines_fetch('COFACTOR')
79
+ end
80
+
81
+ # COMMENT
82
+ def comment
83
+ field_fetch('COMMENT')
84
+ end
85
+
86
+ # PATHWAY
87
+ def pathways
88
+ lines_fetch('PATHWAY')
89
+ end
90
+
91
+ # GENES
92
+ def genes
93
+ lines_fetch('GENES')
94
+ end
95
+
96
+ # DISEASE
97
+ def diseases
98
+ lines_fetch('DISEASE')
99
+ end
100
+
101
+ # MOTIF
102
+ def motifs
103
+ lines_fetch('MOTIF')
104
+ end
105
+
106
+ # STRUCTURES
107
+ def structures
108
+ unless @data['STRUCTURES']
109
+ @data['STRUCTURES'] =
110
+ fetch('STRUCTURES').sub(/(PDB: )*/,'').split(/\s+/)
111
+ end
112
+ @data['STRUCTURES']
113
+ end
114
+
115
+ # DBLINKS
116
+ def dblinks
117
+ lines_fetch('DBLINKS')
118
+ end
119
+
120
+ end
121
+
122
+ end
123
+
124
+ end
125
+
@@ -0,0 +1,173 @@
1
+ #
2
+ # = bio/db/kegg/expression.rb - KEGG EXPRESSION database class
3
+ #
4
+ # Copyright:: Copyright (C) 2001-2003, 2005
5
+ # Shuichi Kawashima <shuichi@hgc.jp>,
6
+ # Toshiaki Katayama <k@bioruby.org>
7
+ # License:: LGPL
8
+ #
9
+ # $Id: expression.rb,v 1.9 2005/11/05 08:27:26 k Exp $
10
+ #
11
+ #--
12
+ #
13
+ # This library is free software; you can redistribute it and/or
14
+ # modify it under the terms of the GNU Lesser General Public
15
+ # License as published by the Free Software Foundation; either
16
+ # version 2 of the License, or (at your option) any later version.
17
+ #
18
+ # This library is distributed in the hope that it will be useful,
19
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
+ # Lesser General Public License for more details.
22
+ #
23
+ # You should have received a copy of the GNU Lesser General Public
24
+ # License along with this library; if not, write to the Free Software
25
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26
+ #
27
+ #++
28
+ #
29
+
30
+ require "bio/db"
31
+
32
+ module Bio
33
+ class KEGG
34
+
35
+ class EXPRESSION
36
+
37
+ def initialize(entry)
38
+ @orf2val = Hash.new('')
39
+ @orf2rgb = Hash.new('')
40
+ @orf2ratio = Hash.new('')
41
+ @max_intensity = 10000
42
+ entry.split("\n").each do |line|
43
+ unless /^#/ =~ line
44
+ ary = line.split("\t")
45
+ orf = ary.shift
46
+ val = ary[2, 4].collect {|x| x.to_f}
47
+ @orf2val[orf] = val
48
+ end
49
+ end
50
+ end
51
+ attr_reader :orf2val
52
+ attr_reader :orf2rgb
53
+ attr_reader :orf2ratio
54
+ attr_reader :max_intensity
55
+
56
+ def control_avg
57
+ sum = 0.0
58
+ @orf2val.values.each do |v|
59
+ sum += v[0] - v[1]
60
+ end
61
+ sum/orf2val.size
62
+ end
63
+
64
+ def target_avg
65
+ sum = 0.0
66
+ @orf2val.values.each do |v|
67
+ sum += v[2] - v[3]
68
+ end
69
+ sum/orf2val.size
70
+ end
71
+
72
+ def control_var
73
+ sum = 0.0
74
+ avg = self.control_avg
75
+ @orf2val.values.each do |v|
76
+ tmp = v[0] - v[1]
77
+ sum += (tmp - avg)*(tmp - avg)
78
+ end
79
+ sum/orf2val.size
80
+ end
81
+
82
+ def target_var
83
+ sum = 0.0
84
+ avg = self.target_avg
85
+ @orf2val.values.each do |v|
86
+ tmp = v[2] - v[3]
87
+ sum += (tmp - avg)*(tmp - avg)
88
+ end
89
+ sum/orf2val.size
90
+ end
91
+
92
+ def control_sd
93
+ var = self.control_var
94
+ Math.sqrt(var)
95
+ end
96
+
97
+ def target_sd
98
+ var = self.target_var
99
+ Math.sqrt(var)
100
+ end
101
+
102
+ def up_regulated(num=20, threshold=nil)
103
+ logy_minus_logx
104
+ ary = @orf2ratio.to_a.sort{|a, b| b[1] <=> a[1]}
105
+ if threshold != nil
106
+ i = 0
107
+ while ary[i][1] > threshold
108
+ i += 1
109
+ end
110
+ return ary[0..i]
111
+ else
112
+ return ary[0..num-1]
113
+ end
114
+ end
115
+
116
+ def down_regulated(num=20, threshold=nil)
117
+ logy_minus_logx
118
+ ary = @orf2ratio.to_a.sort{|a, b| a[1] <=> b[1]}
119
+ if threshold != nil
120
+ i = 0
121
+ while ary[i][1] < threshold
122
+ i += 1
123
+ end
124
+ return ary[0..i]
125
+ else
126
+ return ary[0..num-1]
127
+ end
128
+ end
129
+
130
+ def regulated(num=20, threshold=nil)
131
+ logy_minus_logx
132
+ ary = @orf2ratio.to_a.sort{|a, b| b[1].abs <=> a[1].abs}
133
+ if threshold != nil
134
+ i = 0
135
+ while ary[i][1].abs > threshold
136
+ i += 1
137
+ end
138
+ return ary[0..i]
139
+ else
140
+ return ary[0..num-1]
141
+ end
142
+ end
143
+
144
+ def logy_minus_logx
145
+ @orf2val.each do |k, v|
146
+ @orf2ratio[k] = (1.0/Math.log10(2))*(Math.log10(v[2]-v[3]) - Math.log10(v[0]-v[1]))
147
+ end
148
+ end
149
+
150
+ def val2rgb
151
+ col_unit = @max_intensity/255
152
+ @orf2val.each do |k, v|
153
+ tmp_val = ((v[0] - v[1])/col_unit).to_i
154
+ if tmp_val > 255
155
+ g = "ff"
156
+ else
157
+ g = format("%02x", tmp_val)
158
+ end
159
+ tmp_val = ((v[2] - v[3])/col_unit).to_i
160
+ if tmp_val > 255
161
+ r = "ff"
162
+ else
163
+ r = format("%02x", tmp_val)
164
+ end
165
+ @orf2rgb[k] = r + g + "00"
166
+ end
167
+
168
+ end
169
+
170
+ end # class EXPRESSION
171
+
172
+ end # class KEGG
173
+ end # module Bio