bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # gt2fasta.rb - convert GenBank translations into FASTA format (pep)
4
+ #
5
+ # Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
6
+ #
7
+ # This program is free software; you can redistribute it and/or modify
8
+ # it under the terms of the GNU General Public License as published by
9
+ # the Free Software Foundation; either version 2 of the License, or
10
+ # (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU General Public License for more details.
16
+ #
17
+ # $Id: gt2fasta.rb,v 0.3 2002/04/15 03:06:17 k Exp $
18
+ #
19
+
20
+ require 'bio/io/flatfile'
21
+ require 'bio/feature'
22
+ require 'bio/db/genbank'
23
+
24
+ include Bio
25
+
26
+ ff = FlatFile.new(GenBank, ARGF)
27
+
28
+ while gb = ff.next_entry
29
+
30
+ orf = 0
31
+ gb.features.each do |f|
32
+ f = f.assoc
33
+ if aaseq = f['translation']
34
+ orf += 1
35
+ gene = [
36
+ f['gene'],
37
+ f['product'],
38
+ f['note'],
39
+ f['function']
40
+ ].compact.join(', ')
41
+ definition = "gp:#{gb.entry_id}_#{orf} #{gene} [#{gb.organism}]"
42
+ print aaseq.to_fasta(definition, 70)
43
+ end
44
+ end
45
+
46
+ end
47
+
data/sample/pmfetch.rb ADDED
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # pmfetch.rb - generate BibTeX format reference list by PubMed ID list
4
+ #
5
+ # Copyright (C) 2002 KATAYAMA Toshiaki <k@bioruby.org>
6
+ #
7
+ # This program is free software; you can redistribute it and/or modify
8
+ # it under the terms of the GNU General Public License as published by
9
+ # the Free Software Foundation; either version 2 of the License, or
10
+ # (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU General Public License
18
+ # along with this program; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ # $Id: pmfetch.rb,v 1.2 2002/07/23 04:52:03 k Exp $
22
+ #
23
+
24
+ require 'bio'
25
+
26
+ if ARGV[0] =~ /-f/
27
+ ARGV.shift
28
+ form = ARGV.shift
29
+ else
30
+ form = 'bibtex'
31
+ end
32
+
33
+ ARGV.each do |id|
34
+ entry = Bio::PubMed.query(id)
35
+ case form
36
+ when 'medline'
37
+ puts entry
38
+ else
39
+ puts Bio::MEDLINE.new(entry).reference.send(form)
40
+ end
41
+ end
42
+
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # pmsearch.rb - generate BibTeX format reference list by PubMed keyword search
4
+ #
5
+ # Copyright (C) 2002 KATAYAMA Toshiaki <k@bioruby.org>
6
+ #
7
+ # This program is free software; you can redistribute it and/or modify
8
+ # it under the terms of the GNU General Public License as published by
9
+ # the Free Software Foundation; either version 2 of the License, or
10
+ # (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU General Public License
18
+ # along with this program; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ # $Id: pmsearch.rb,v 1.2 2002/07/23 04:52:03 k Exp $
22
+ #
23
+
24
+ require 'bio'
25
+
26
+ if ARGV[0] =~ /-f/
27
+ ARGV.shift
28
+ form = ARGV.shift
29
+ else
30
+ form = 'bibtex'
31
+ end
32
+
33
+ entries = Bio::PubMed.search(ARGV.join(' '))
34
+ entries.each do |entry|
35
+ case form
36
+ when 'medline'
37
+ puts entry
38
+ else
39
+ puts Bio::MEDLINE.new(entry).reference.send(form)
40
+ end
41
+ end
42
+
@@ -0,0 +1,222 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # psortplot_html.rb - A KEGG API demo script. Generates a HTML file of
4
+ # genes marked by PSORT II predictions onto a
5
+ # KEGG/PATHWAY map.
6
+ #
7
+ # Usage:
8
+ #
9
+ # % ruby psortplot_html.rb
10
+ # % cat sce00010_psort2.html
11
+ # % ruby psortplot_html.rb path:eco00010
12
+ # % cat eco00010_psort2.html
13
+ #
14
+ # Copyright (C) 2005 Mitsuteru C. Nakao <n@bioruby.org>
15
+ #
16
+ # This program is free software; you can redistribute it and/or modify
17
+ # it under the terms of the GNU General Public License as published by
18
+ # the Free Software Foundation; either version 2 of the License, or
19
+ # (at your option) any later version.
20
+ #
21
+ # This program is distributed in the hope that it will be useful,
22
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
23
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24
+ # GNU General Public License for more details.
25
+ #
26
+ # $Id: psortplot_html.rb,v 1.1 2005/10/12 02:10:11 nakao Exp $
27
+ #
28
+
29
+ require 'bio'
30
+
31
+ class KEGG
32
+ DBGET_BASEURI = 'http://kegg.com/dbget-bin'
33
+ WWW_BGET_BASEURI = DBGET_BASEURI + '/www_bget'
34
+ WWW_PATHWAY_BASEURI = DBGET_BASEURI + '/get_pathway'
35
+
36
+ # path := path:sce00010
37
+ def self.link_pathway(path0)
38
+ path, path = path0.split(':')
39
+ org_name = path.scan(/(^\w{3})/).to_s
40
+ mapno = path.sub(org_name, '')
41
+ str = "<a href='#{WWW_PATHWAY_BASEURI}?org_name=#{org_name}&mapno=#{mapno}'>#{path0}</a>"
42
+ end
43
+
44
+ # ec_num := ec:1.2.3.4
45
+ def self.link_ec(ec_num)
46
+ ec = ec_num.sub(/^ec:/, '')
47
+ str = "<a href='#{WWW_BGET_BASEURI}?enzyme+#{ec}'>#{ec_num}</a>"
48
+ return str
49
+ end
50
+
51
+ # gene := eco:b0002
52
+ def self.link_genes(gene)
53
+ org_name, gene_name = gene.split(':')
54
+ str = "<a href='#{WWW_BGET_BASEURI}?#{org_name}+#{gene_name}'>#{gene}</a>"
55
+ return str
56
+ end
57
+ end
58
+
59
+
60
+ class PSORT
61
+ COLOR_Palette = {
62
+ 'csk' => "#FF0000", # 'cytoskeletal'
63
+ 'cyt' => "#FF8000", # 'cytoplasmic'
64
+ 'nuc' => "#FFFF00", # 'nuclear'
65
+ 'mit' => "#80FF00", # 'mitochondrial'
66
+ 'ves' => "#00FF00", # 'vesicles of secretory system'
67
+ 'end' => "#00FF80", # 'endoplasmic reticulum'
68
+ 'gol' => "#00FFFF", # 'Golgi'
69
+ 'vac' => "#0080FF", # 'vacuolar'
70
+ 'pla' => "#0000FF", # 'plasma membrane'
71
+ 'pox' => "#8000FF", # 'peroxisomal'
72
+ 'exc' => "#FF00FF", # 'extracellular, including cell wall'
73
+ '---' => "#FF0080" # 'other'
74
+ }
75
+ end
76
+
77
+
78
+
79
+
80
+ keggapi = Bio::KEGG::API.new
81
+ psort2serv = Bio::PSORT::PSORT2.imsut
82
+
83
+ # Obtains a list of genes on specified pathway
84
+ pathway = ARGV.shift || "path:sce00010"
85
+ genes = keggapi.get_genes_by_pathway(pathway)
86
+
87
+ scl = Hash.new # protein subcelluler localizations
88
+ ec = Hash.new # EC numbers
89
+
90
+ serial = 0
91
+ sync_default = $stdout.sync
92
+ $stdout.sync = true
93
+ genes.each do |gene|
94
+ print "#{(serial += 1).to_s.rjust(genes.size.to_s.size)}\t#{gene}\t"
95
+ # Obtains amino acid sequence from KEGG GENES entry
96
+ aaseq = keggapi.get_aaseqs([gene])
97
+
98
+ # Predicts protein subcellualr localization
99
+ result = psort2serv.exec(aaseq)
100
+ scl[gene] = result.pred
101
+ print "#{scl[gene]}\t"
102
+
103
+ # Obtains the EC number from KEGG GENES entry
104
+ ec[gene] = keggapi.get_enzymes_by_gene(gene)
105
+ puts "#{ec[gene].inspect}"
106
+ end
107
+ $stdout.sync = sync_default
108
+
109
+
110
+
111
+
112
+ fg_list = Array.new
113
+ bg_list = Array.new
114
+
115
+ genes.each do |gene|
116
+ fg_list << "#FF0000"
117
+ bg_list << PSORT::COLOR_Palette[scl[gene]]
118
+ end
119
+
120
+ # coloring KEGG pathway according to gene's localization
121
+ url = keggapi.color_pathway_by_objects(pathway, genes, fg_list, bg_list)
122
+ puts "#{url} downloaded."
123
+
124
+ # remove "path:" prefix from pathway_id
125
+ path_code = pathway.sub(/^path:/, '')
126
+
127
+ # save the result image
128
+ image_file = "#{path_code}_psort2.gif"
129
+ begin
130
+ keggapi.save_image(url, image_file)
131
+ end
132
+
133
+
134
+ # create html with a color palette
135
+ html = <<END
136
+ <html>
137
+ <head>
138
+ <title>PSORT II prediction protein subcellular localization map of KEGG/PATHWAY (#{pathway})</title>
139
+ <style>
140
+ table { border-collapse: collapse; }
141
+ td { border: 1px solid black; padding: 5px; }
142
+ td.outer { border: none; vertical-align: top; }
143
+ </style>
144
+ </head>
145
+ <body>
146
+ <h1><li><a href="http://psort.ims.u-tokyo.ac.jp/helpwww2.html">PSORT II</a> prediction protein subcellular localization map of <a href="http://kegg.com/kegg/pathway.html">KEGG/PATHWAY</a> (<a href="">#{KEGG.link_pathway(pathway)})</h1>
147
+
148
+ <table>
149
+ <tr>
150
+ <td class=outer>
151
+ <table>
152
+ <tr>
153
+ <th></th>
154
+ <th>EC</th>
155
+ <th>Gene</th>
156
+ <th>Localization</th>
157
+ </tr>
158
+ END
159
+
160
+
161
+ # generate gene table with localization
162
+ names = Bio::PSORT::PSORT2::SclNames
163
+ multi_genes = Hash.new(0)
164
+
165
+ ec.values.flatten.sort.uniq.each do |ec_num|
166
+ ec.find_all {|x| x[1].include?(ec_num) }.each do |gene|
167
+ gene = gene[0]
168
+ loc = scl[gene]
169
+ color = PSORT::COLOR_Palette[loc]
170
+ name = names[loc]
171
+ multi_genes[gene] += 1
172
+
173
+ html += <<END
174
+ <tr>
175
+ <td>#{multi_genes[gene]}</td>
176
+ <td>#{KEGG.link_ec(ec_num)}</td>
177
+ <td>#{KEGG.link_genes(gene)}</td>
178
+ <td bgcolor="#{color}">#{name}</td>
179
+ </tr>
180
+ END
181
+ end
182
+ end
183
+
184
+ html += <<END
185
+ </table>
186
+ </td>
187
+ <td class=outer>
188
+ <table>
189
+ <tr>
190
+ <th>Code</th>
191
+ <th>Color</th>
192
+ </tr>
193
+ END
194
+
195
+ # generate color code table also
196
+ PSORT::COLOR_Palette.sort.each do |code, color|
197
+ html += <<END
198
+ <tr>
199
+ <td>#{code}</td>
200
+ <td bgcolor="#{color}">#{names[code]}</td>
201
+ </tr>
202
+ END
203
+ end
204
+
205
+ html += <<END
206
+ </table>
207
+ </td>
208
+ </tr>
209
+ </table>
210
+ <br>
211
+ <img src="#{image_file}">
212
+ </body>
213
+ </html>
214
+ END
215
+
216
+ # save generated HTML file
217
+ html_file = "#{path_code}_psort2.html"
218
+ File.open(html_file, "w+") do |file|
219
+ file.puts html
220
+ end
221
+
222
+ puts "Open #{html_file}"
@@ -0,0 +1,96 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # ssearch2tab.rb - convert SSEARCH output into tab delimited data for MySQL
4
+ #
5
+ # Usage:
6
+ #
7
+ # % ssearch2tab.rb SSEARCH-output-file[s] > ssearch_results.tab
8
+ # % mysql < ssearch_results.sql (use sample at the end of this file)
9
+ #
10
+ # Format accepted:
11
+ #
12
+ # % ssearch3[3][_t] -Q -H -m 6 query.f target.f > SSEARCH-output-file
13
+ #
14
+ # Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
15
+ #
16
+ # This program is free software; you can redistribute it and/or modify
17
+ # it under the terms of the GNU General Public License as published by
18
+ # the Free Software Foundation; either version 2 of the License, or
19
+ # (at your option) any later version.
20
+ #
21
+ # This program is distributed in the hope that it will be useful,
22
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
23
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24
+ # GNU General Public License for more details.
25
+ #
26
+ # $Id: ssearch2tab.rb,v 0.1 2001/06/21 08:25:58 katayama Exp $
27
+ #
28
+
29
+ while gets
30
+
31
+ # query
32
+ if /^\S+: (\d+) aa$/
33
+ q_len = $1
34
+ end
35
+
36
+ # each hit
37
+ if /^>>([^>]\S+).*\((\d+) aa\)$/
38
+ target = $1
39
+ t_len = $2
40
+
41
+ # d = dummy variable
42
+ d, d, d, swopt, d, zscore, d, bits, d, evalue =
43
+ gets.split(/\s+/)
44
+ d, d, sw, ident, d, ugident, d, d, overlap, d, d, lap =
45
+ gets.split(/\s+/)
46
+
47
+ # query-hit pair
48
+ print "#{$FILENAME}\t#{q_len}\t#{target}\t#{t_len}"
49
+
50
+ # pick up values
51
+ ary = [
52
+ swopt,
53
+ zscore,
54
+ bits,
55
+ evalue,
56
+ sw,
57
+ ident,
58
+ ugident,
59
+ overlap,
60
+ lap
61
+ ]
62
+
63
+ # print values
64
+ for i in ary
65
+ i.tr!('^0-9.:e\-','')
66
+ print "\t#{i}"
67
+ end
68
+
69
+ print "\n"
70
+
71
+ end
72
+ end
73
+
74
+ =begin MySQL ssearch_results.sql sample
75
+
76
+ CREATE DATABASE IF NOT EXISTS db_name;
77
+ CREATE TABLE IF NOT EXISTS db_name.table_name (
78
+ query varchar(25) not NULL,
79
+ q_len integer unsigned default 0,
80
+ target varchar(25) not NULL,
81
+ t_len integer unsigned default 0,
82
+ swopt integer unsigned default 0,
83
+ zscore float default 0.0,
84
+ bits float default 0.0,
85
+ evalue float default 0.0,
86
+ sw integer unsigned default 0,
87
+ ident float default 0.0,
88
+ ugident float default 0.0,
89
+ overlap integer unsigned default 0,
90
+ lap_at varchar(25) default NULL
91
+ );
92
+ LOAD DATA LOCAL INFILE 'ssearch_results.tab' INTO TABLE db_name.table_name;
93
+
94
+ =end
95
+
96
+