bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
data/doc/Design.rd.ja ADDED
@@ -0,0 +1,341 @@
1
+ =begin
2
+
3
+ $Id: Design.rd.ja,v 1.7 2002/07/02 01:40:24 k Exp $
4
+
5
+ Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
6
+
7
+ = BioRuby �γ�ȯ���ˤȥǥ�����
8
+
9
+ �������⥸�塼����ɲä������ CVS �ΰʲ��Υ�ݥ��ȥ�ˡ����Ƥ˽��ä�
10
+ Ŭ�ڤ� commit ���ޤ����᡼��󥰥ꥹ�Ȥʤɤ� contribute ���줿�����ɤ�
11
+ �ܿͤ� committer �ˤʤäƤ�餦���������åդ� commit ���ޤ���
12
+
13
+ bioruby/
14
+ |-- README �Ϥ����
15
+ |-- install.rb ���󥹥ȡ���
16
+ |-- COPYING
17
+ |-- COPYING.LIB
18
+ |-- bin/ ���ץꥱ�������
19
+ |-- lib/ Ruby �ǽ񤫤줿�饤�֥��
20
+ | `-- bio/ ����ʤɴ���Ū�ʥ��饹
21
+ | |-- data/ ��ʪ��Ū������ʤɥǡ������Τ��
22
+ | |-- db/ �Ƽ�ǡ����١����ѡ���
23
+ | |-- io/ �ǡ�����������
24
+ | `-- appl/ �������ץ�ν���
25
+ |-- doc/ �ɥ������
26
+ |-- ext/ C �ǽ񤫤줿�饤�֥��
27
+ `-- sample/ ����ץ륳����
28
+
29
+ == Ruby ��ʬ�ˤĤ��ƤΥǥ������bioruby/lib/bio/ �ʲ���
30
+
31
+ ����ޤǤ˹Ԥʤ�줿�����Υǥ����󥳥󥻥ץȤȡ���������ˤʤɤˤĤ��Ƥ�
32
+ �Ȥ�Ƥ����ޤ���
33
+
34
+ BioRuby �������饹�� module Bio �ǰϤळ�Ȥǡ�̾�����֤��ڤ�ʬ������ˤ�
35
+ �ޤ���
36
+
37
+ === lib/bio/ �ǡ�����¤��
38
+
39
+ ����䥢�ߥλ�������ʸ�����󡢰����Ҥ� location �� feature������դʤ�
40
+ ����ݥǡ�����¤�򰷤����饹�� lib/bio/ ľ�������֤��Ƥ��ޤ��������ϳ�
41
+ �ǡ����١������ѡ����ʤɤ����ľ�ܸƤФ�ޤ���
42
+
43
+ ¾�ˡ����饤���ȥ��饹�ʤɤμ�����ɬ�פ����Τ�ޤ��󡣤ޤ�����ʪ��η�
44
+ ��ʬ���ɽ�� Taxonomy ���饹�� GeneOntology �ʤɤΥ��饹�⤢�����������
45
+ �Τ�ޤ���
46
+
47
+ === lib/bio/db/ �ǡ����١������ѡ�����
48
+
49
+ �Ȥ����δ�ñ�� vs ������ʣ�������������ᤵ vs ����νŤ����ʤɥХ��
50
+ �������ɬ�������ɤ��Ȥϸ����ʤ����⤢��ޤ�����lib/bio/db.rb �Ǥϰʲ�
51
+ �Τ褦���ȼ��Υ����ǥ��ʤ��Ǥ�Ʊ�ͤʻ�ߤ��ʤ���Ƥ������ɤ�����Ĵ�٤Ƥ�
52
+ �ޤ���ˤ���Ѥ��Ƥ��ޤ����������ѡ������ɲä�����⻲�ͤˤ��Ʋ�������
53
+
54
+ ����ȥ� ���줾��Υǡ����١����ˤ����룱�ĤΥǡ���ñ��
55
+ ��) GenBank �ʤ� LOCUS ���� // �ޤǤ����Ƥι�
56
+ �ǥ�ߥ� ����ȥ�ȥ���ȥ����ڤ�ʸ����
57
+ ��) GenBank �ʤ� // �����ι�
58
+ ���� ����ȥ�κ�ü�������ʤɤ˵������ȥåץ�٥�μ��̻�
59
+ ��) GenBank �ʤ� DEFINITION �� FEATURES �ʤ�
60
+ �ե������ ������˰�̣�Τ���ñ�̤�ʤ�ʣ���Ԥ���ʤ�֥��å�
61
+ ��) GenBank �ʤ� SOURCE �˴ޤޤ�� taxonomy �ޤǤι������ʤ�
62
+
63
+ BioRuby �Υǡ����١������ѡ����ϡ�������ȥ�ʬ��ʸ����� .new ���Ϥ�����
64
+ �ѡ���������̤��ݻ����륤�󥹥��󥹤��֤��ǥ�����ˤʤäƤ��ޤ����ƥǡ�
65
+ ���١������饨��ȥ�ñ�̤Υǡ�����������ˡ�ϡ��긵�Υե�åȥե�������
66
+ ����硢�ͥåȱۤ��� DBGET �� BioFetch ������硢NCBI �����äƤ�����硢
67
+ �ʤɻȤ����ˤ�ä��͡��Ǥ������ɤ����饨��ȥ�������� (io) �˴ؤ�餺��
68
+ �ѡ����ϥ���ȥ��ѡ��������������ǰ�Ǥ��ޤ���
69
+
70
+ �ǽ�Ū�ˤ����ƤΥǡ����Ϥ��餫����ѡ������� tab �ڤ�Υǡ������ڤ��ߡ�
71
+ MySQL �ʤɤ��ͤù���Ǥ��ޤäƥѡ�����ɬ�פ��ʤ��褦�������ˤ������Ǥ���
72
+ BioPerl �� BioPerl-DB �˻��Ƥ���Ȼפ��ޤ�����BioRuby ɸ��� DataBase ��
73
+ �������Ȥ� BioRuby-DB (class BRDB) �Ȥ��Ƴ�ȯ��ʤ�Ƥ��ޤ���BioRuby ��
74
+ ���������Ȥ� BioHackathon �� BOSC �˻��ä��� OBDA �� BioSQL �ʤɤ˽��
75
+ ��褦�ˤʤ�ޤ����Τǡ�������ȼ��ǡ����١�����ɬ�פ���ʬ�ϸ��äƤ�����
76
+ �⤷��ޤ���
77
+
78
+ * OBDA : Open Bio Sequence Database Access
79
+ * ((<URL:http://obda.open-bio.org/>))
80
+ * ((<URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/?cvsroot=obf-common>))
81
+ * BOSC : Bioinformatics Open Source Conference
82
+ * ((<URL:http://bosc.open-bio.org/>))
83
+
84
+ + �ǡ����١�����¤����ݲ�
85
+
86
+ lib/bio/db/ �ʲ��Υ⥸�塼��ϡ�lib/bio/db.rb �� require �������ʤ��Ȥ�
87
+ DB ���饹��Ѿ����ޤ�������ˡ������Ĥ��Υǡ����١���������ι�¤�����
88
+ �Ƥ��뤿�ᡢ�Ȥꤢ���� NCBI ����KEGG ��ޤ�ˡ�EMBL ����ʬ���ơ��ѡ�����
89
+ �ݤ˶��̤ǻȤ���᥽�åɤ�ͭ���褦�Ȥ��Ƥ��ޤ������������ºݤˤϤ����
90
+ �η��������ƤϤޤ�ʤ��ǡ����١�����¿���Τǡ�DB ������Ѿ�������⤢
91
+ ��Ǥ��礦��
92
+
93
+ + �ե�����ɤ��Ф��붦�̤� API �����
94
+
95
+ �͡��ʥǡ����١����Ǥ褯���̤��Ƹ����褦�ʥե�����ɤ��Ф��Ƥϡ�db.rb
96
+ ��ˤ���ɥ�����ȤǼ�����붦�̤Υ᥽�å�̾��Ȥäơ��ǡ����������
97
+ ��褦�ˤ��ޤ���
98
+
99
+ ���Τ褦�ˡ��ե�����ɤ��Ф��붦�̤Υ᥽�å�̾�����ƥǡ����١����Υѡ���
100
+ ��Ŭ�ڤ˼�������Ƥ���С��ǡ����١�����˥᥽�åɤ�Ф���ɬ�פ�����ޤ���
101
+ �㤨�С��ǡ����١������ۤʤäƤ�֥���ȥ�������ե�����ɡ׼����Τ����
102
+ �᥽�åɤ� definition �������ȿ�¬���䤹���ʤ�ޤ���
103
+
104
+ + ɬ�פʻ��ޤǥѡ������٤餻���on-demand parsing��
105
+
106
+ GenBank �ʤ�ʣ���ʹ�¤�Υǡ����١�����ѡ�������ݤˤ⥹�롼�ץåȤ��ɤ�
107
+ ���뤿�ᡢ�ǽ�ϥե����������ڤ�ʬ����������������Ԥʤ�������Υե���
108
+ �����Υǡ������׵᤹��᥽�åɤ��ƤФ줿�������ºݤ˥ե��������Υǡ�
109
+ ����٤����ѡ������������Ԥʤ��ޤ���������󤳤λ��ѡ��������Τϡ�ɬ
110
+ �פʥե�����ɤ����Ǥ���
111
+
112
+ + ���٥ѡ���������̤ϥ���å��夷�Ƥ���
113
+
114
+ �嵭�� on-demand parsing �ǥѡ���������̤ϡ����֥���������˥���å���
115
+ ���Ƥ����Τǡ������ܰʹߤΥ᥽�åɥ�����Ǥ��ݻ����Ƥ���ѡ����ѤߤΥǡ�
116
+ �����֤��ޤ�������ˤ��Ť��ѡ����򷫤��֤��褦�ʥ����С��إåɤ�̵����
117
+ �Ƥ��ޤ���
118
+
119
+ === lib/bio/data/ ������ǡ�����
120
+
121
+ lib/bio/data/ �ʲ��ˡ������Ĥ��Υǡ����ʥ��ߥλ���̾����KEGG �Ǥ���ʪ��
122
+ ̾�����ɥ�ɽ�ʤɡˤ�����Ȥ����֤���Ƥ��ޤ���
123
+
124
+ Bio::AminoAcid �� ���ߥλ���̾����ɽ���ϥå��塢�ʤ�
125
+
126
+ �����ϡ�ɬ�פʥ��饹�˥��������᥽�åɤ�������Ƥ��ɤ��Ǥ��礦��
127
+
128
+ === lib/bio/io/ ���󥿡��ե�������IO ��
129
+
130
+ lib/bio/io/ �ʲ��ˤϡ��ե�åȥե���������򥨥�ȥ�ñ�̤��ɤ߹��९�饹��
131
+ ���Υ�ͥåȤ���ǡ���������Ԥʤ� DBGET ���饹�� NCBI �� PubMed �ˤ��
132
+ MEDLINE �ǡ���������Ԥʤ� PubMed ���饹��OBDA �˽�򤷤� BioRegistry,
133
+ BioFetch, BioSQL �ʤɤˤ��ǡ���������Ԥ����饹�ʤɤ�����ޤ���
134
+
135
+ * flatfile.rb �ʼ����ѡ�
136
+
137
+ # ��������Υե������ GenBank �ե����ޥåȤȤ��Ƴ���
138
+ flatfile = Bio::FlatFile.open(Bio::GenBank, "genbank/gbest40.seq")
139
+
140
+ # �ޤ��ϥե�����ϰ�����Ϳ����
141
+ Bio::FlatFile.new(Bio::GenBank, ARGF)
142
+
143
+ # �ޤ��� IO ��Ϳ����
144
+ Bio::FlatFile.new(Bio::GenBank, IO.popen("gzip -dc nc1101.flat.gz"))
145
+
146
+ # �ǽ�Υ���ȥ�� GenBank ���֥������Ȥ�
147
+ gb = flatfile.next_entry
148
+
149
+ # ����ȥꤴ�Ȥ� Bio::GenBank ���֥������Ȥ�����
150
+ flatfile.each do |gb|
151
+ puts gb.definition
152
+ end
153
+
154
+ === lib/bio/appl/ �ġ����
155
+
156
+ FASTA(SSEARCH), [PSI-]BLAST, HMMER, CLUSTALW �ʤɤγ������ץ��Ȥä���
157
+ ����Ԥ��ޤ�����������ʼ¹ԡ�http �ʤɤˤ���⡼�ȼ¹ԡ���̤Υե���
158
+ �ޥåȤʤɤ�ռ������ˡ���̤��ѡ��������Ǽ���줿 Report ���֥������Ȥ�
159
+ �ɤ��֤�褦�ʴ����ˤ��Ƥ����ޤ���
160
+
161
+ * query �Ȥʤ륪�֥������Ȥˡ����ץ��¹Ԥ���᥽�åɤ������ɬ�פʰ���
162
+ ��Ϳ�����
163
+
164
+ factory = Bio::Fasta.remote('fasta', 'genes')
165
+ fa_res = f.fasta(factory) # f �� Bio::FastaFormat ���֥�������
166
+ fa_res = seq.fasta(factory) # seq �� Bio::Sequence::AA ���֥������Ȥʤ�
167
+
168
+ * ���������������ե����ȥ�ˡ������С�query��target �ʤɤ����ꤷ�ơ�����
169
+ ���¹Ԥ���᥽�åɤ�Ƥ�
170
+
171
+ factory = Bio::Fasta.local(prog, target, opt)
172
+ fa_res = factory.query(seq)
173
+
174
+ �ʤɤ����Ĥ��μ�����ˡ���ͤ����ޤ���Bio::Fasta, Bio::Blast �Ǥ�ξ����
175
+ ����ץꤷ�Ƥ��ޤ��ˡ�
176
+
177
+ �ե����ȥ��᥽�åɤؤΥ��ץ������Ϥ����ϡ�ʣ����ʪ�ϥ���ܥ�򥭡���
178
+ ���� hash �ˤ��������ɤ����⤷��ޤ��󡣤⤷����̾���Ĥ������Ԥ���
179
+
180
+ res = a.query(:hoge => fuga, :hoge2 => fuga2)
181
+
182
+ ����¾�Υ��ץ�Υ��󥿡��ե�������������ʤ�����ͤ���Ƥ��������Ȼ�
183
+ ���ޤ���
184
+
185
+ == �����ǥ��󥰥�������
186
+
187
+ ���饹�߷ס��᥽�å�����ʤ����̤ˤ����ơ�KISS (keep it simple stupid) ��
188
+ ���ܤǤ������������ʤ�Ǥ⤫��Ǥ� class ��ʬ�򤷤Ƥ��ޤ����ɤ��櫓�Ǥ�
189
+ �ʤ��Ǥ��礦����ǰ�Ȥ��ư�ĤΤޤȤޤ���Ȼפ����Τޤ�ʬ�򤹤�Ф褯��
190
+ �ä˻Ȥ��󤷤Τ����ʤ����֥������Ȥ���������ɬ�פϤʤ��Ȼפ��ޤ���
191
+
192
+ ����Ū�ʻ��ˤĤ��Ƥϡ���˲�����;�Ϥ�����Ȼפ��ޤ����񤭴�������ˤ��
193
+ ����ץ��ʬ����䤹���ʤ��硢����γ�ȯ�����ࡼ���ˤʤ�Ȼפ�����
194
+ �ˤϡ��ɤ�ɤ������Ƥ����ޤ����դˡ�¿����Ψ���ɤ��ʤ�Ȥ��Ƥⲿ����
195
+ �Ƥ��뤫ʬ����ˤ����ʤ��硢ɬ�������⤤�Ȼפ����Ѷ�Ū����ͳ�����Ĥ���
196
+ �ʤ����Ϻ��Ѥ򸫹�碌�뤳�Ȥ⤢��Ȼפ��ޤ���������ˤ��Ƥ⡢�С�����
197
+ �� 1.0 �ޤǤϲ��̸ߴ��������ˤ����ѹ���Ԥʤ�ͽ��Ǥ���
198
+
199
+ ��������������ˤ����ܸ�ʤ��� ASCII ��ʸ����ϴޤޤʤ��褦�ˤ��ޤ���
200
+
201
+ === �إå���
202
+
203
+ �إå����ˤϡ��ե�����̾�����ס�����ԡ��饤���󥹡�CVS �� ID ��񤭤ޤ�
204
+ �ʲ�����Ǥ� ID �Ρ��Ÿ������ʤ��褦�����Ѥˤ��Ƥ��ޤ������ºݤ� $ �Ǥ��ˡ�
205
+
206
+ #
207
+ # bio/hoge.rb - biological hoge class
208
+ #
209
+ # Copyright (C) 2000, 2001 KATAYAMA Toshiaki <k@bioruby.org>
210
+ #
211
+ # This library is free software; you can redistribute it and/or
212
+ # modify it under the terms of the GNU Lesser General Public
213
+ # License as published by the Free Software Foundation; either
214
+ # version 2 of the License, or (at your option) any later version.
215
+ #
216
+ # This library is distributed in the hope that it will be useful,
217
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
218
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
219
+ # Lesser General Public License for more details.
220
+ #
221
+ # You should have received a copy of the GNU Lesser General Public
222
+ # License along with this library; if not, write to the Free Software
223
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
224
+ #
225
+ # ��Id:��
226
+ #
227
+
228
+ ���ץꥱ�������䥵��ץ륳���ɤ�񤯾��ϡ�ruby �Υѥ��˰�¸���ʤ���
229
+ ���ˣ����ܤ�
230
+
231
+ #!/usr/bin/env ruby
232
+
233
+ �Τ褦�˻Ϥ��Τ��ɤ��Ǥ��礦��
234
+
235
+ === ����
236
+
237
+ ���Τ� module Bio �dz�ꡢɬ�פʥ��饹��������ޤ���������Ǥ� # ��Ȥ�
238
+ �ƥ����Ȥ�Ĥ���RD ��Ȥä������Ȥϥե�����κǸ�Υɥ����������
239
+ �ǤϽФƤ��ʤ��褦�ˤ��ޤ����⤷�������륯�饹�� bio/db/ �ʲ��Υǡ����١�
240
+ ���ѡ����Ϥ��ä��顢bio/db.rb �Υɥ�����Ȥ⻲�Ȥ��Ʋ�������
241
+
242
+ require 'foo/bar'
243
+
244
+ modlue Bio
245
+
246
+ class Hoge
247
+
248
+ # this method do hogehoge
249
+ def hoge(fuga)
250
+ @fuga = fuga # storing data fuga
251
+ end
252
+
253
+ def gege
254
+ end
255
+
256
+ end
257
+
258
+ end
259
+
260
+ RDoc ��Ȥ��Х��饹��᥽�åɤξ�˽񤫤줿�����Ȥ��� HTML �ʤɤ�����
261
+ �Ǥ���褦�Ǥ������� ri �Ѥν��Ϥ�Ǥ���褦�ˤʤäƤ����顢���Ѥ��뤫��
262
+ ����ޤ���
263
+
264
+ === �ƥ��ȥ�������
265
+
266
+ �ƥ��ȥ����ɤϥ��饹����ʤ�������ʬ�Τ��Ȥˤ�Ĥ��뤳�Ȥˤ��ޤ����ƥ���
267
+ �����ɤν����Ϥ��줫�鸡Ƥ���Ƥ���ɬ�פ�����ޤ������Ȥꤢ�����ʲ��Τ�
268
+ ���ʹ�ʸ�ǥƥ��ȥ����ɤ�Ϥߤޤ���
269
+
270
+ if __FILE__ == $0
271
+ # test code here
272
+ end
273
+
274
+ ���Τ��ޤ��ʤ��Τ褦�ʹ�ʸ�ϡ��ե����� hoge.rb �����ޥ�ɥ饤�󤫤�
275
+
276
+ % ruby hoge.rb
277
+
278
+ �ʤɤȥ�����ץȤȤ��Ƶ�ư���줿���˼¹Ԥ���ޤ������饤�֥��Ȥ���
279
+
280
+ require 'hoge'
281
+
282
+ �Τ褦�˥����ɤ��줿���ˤϼ¹Ԥ���ʤ��褦�ˤ��뤿��ξ����Ǥ���
283
+
284
+ �ƥ��Ȥ���ˡ�ϡ�Test::Unit �� ruby ��ɸ��饤�֥��Ȥ���ź�դ����褦
285
+ �ˤʤ�к��Ѥ��Ƥ⤤���Ȼפ��ޤ���������ޤǤϿ���������Ȼפ��ޤ�����
286
+ ���Ȥ˳����ե������ͥåȥ���ʤɤ�ɬ�פʾ��ɤ����뤫�Ȥ������󥹥ȡ�
287
+ ������ư���ǧ�Ȥ��ƥƥ��Ȥ�¹Ԥ�����ˡ�ʤɤ⸡Ƥ����ɬ�פ�����ޤ���
288
+
289
+ === �ɥ��������
290
+
291
+ ����ޤǡ��ƥ��饹�ˤĤ��Ƥϡ����ޤ�ɥ�����Ȳ�����Ƥ��ޤ���Ǥ�������
292
+ ����ϥƥ��ȥ����ɤΤ��ȡ��ե�����κǸ����� RD �ǥ᥽�åɤʤɤΥɥ����
293
+ ��Ȥ��ɲä��뤳�Ȥˤ��Ƥ����ޤ���
294
+
295
+ =begin
296
+
297
+ = Bio::Hoge
298
+
299
+ Hoge ���饹�γ���������
300
+
301
+ --- Bio::Hoge.new(fuga)
302
+
303
+ ���饹�᥽�åɤ�������
304
+
305
+ --- Bio::Hoge#to_a
306
+
307
+ �᥽�åɤ�������
308
+
309
+ == Bio::Hoge::Fuga
310
+
311
+ �������饹 Hoge::Fuga �γ�������
312
+
313
+ --- Bio::Hoge::Fuga.new(fuga)
314
+
315
+ ���饹�᥽�åɤ�������
316
+
317
+ --- Bio::Hoge::Fuga#to_a
318
+
319
+ �᥽�åɤ�������
320
+
321
+ =end
322
+
323
+ ��������
324
+
325
+ * ����������ˤ� ASCII �ʳ���ʸ��������ʤ����ˤΤ��ᡢ���ܸ�Υɥ����
326
+ ��Ȥϡ�������ʬ�ǤϤʤ��̥ե�����˵��Ҥ���doc/ �ǥ��쥯�ȥ�ʲ�����
327
+ ���ʤɤ��ޤ������κݡ���ĥ�Ҥ� .ja ���դ��ơ��ե�����̾��������ܸ��
328
+ ʬ����褦�ˤ��ޤ���
329
+
330
+ * ���ҤΤ褦�ˡ�������������������Ǥ� RD �ˤ��ɥ�����Ȥ϶ػߤ��ޤ���
331
+ ��ͳ�Ͻ��˥����ɤ��ɤߤŤ餯�ʤ뤫��Ǥ������������ # ��Ȥä�ɬ��
332
+ �ʥ����Ȥ��ɲä�����ϴ��ޤ��ޤ���(���ҤΤ褦�� RDoc ���ˤ��뤫��
333
+ ����ޤ���)
334
+
335
+ == ��ȯ�˻��ä���ˤ�
336
+
337
+ BioRuby �Υ����֥����� ((<URL:http://bioruby.org>)) �򸫤ơ��᡼��󥰥�
338
+ ���Ȥ˻��ä��뤫�������å� <staff@bioruby.org> ��Ϣ�����Ƥ���������
339
+
340
+ =end
341
+
data/doc/KEGG_API.rd ADDED
@@ -0,0 +1,1437 @@
1
+ =begin
2
+
3
+ $Id: KEGG_API.rd,v 1.1 2005/08/31 13:29:01 k Exp $
4
+
5
+ Copyright (C) 2003-2005 Toshiaki Katayama <k@bioruby.org>
6
+
7
+ = KEGG API
8
+
9
+ KEGG API is a web service to use the KEGG system from your program via
10
+ SOAP/WSDL.
11
+
12
+ We have been making the ((<KEGG|URL:/kegg/>)) system available at
13
+ ((<GenomeNet|URL:/>)). KEGG is a suite of databases including GENES,
14
+ SSDB, PATHWAY, LIGAND, LinkDB, etc. for genome research and related
15
+ research areas in molecular and cellular biology. These databases and
16
+ associated computation services are available via WWW and the user
17
+ interfaces are built on web browsers. Thus, the interfaces are
18
+ designed to be accessed by humans, not by machines, which means that
19
+ it is troublesome for the researchers who want to use KEGG in an
20
+ automated manner. Besides, from the database developer's side, it is
21
+ impossible to prepare all the CGI programs that satisfy a variety of
22
+ users' needs.
23
+
24
+ In recent years, the Internet technology for
25
+ application-to-application communication referred to as the
26
+ ((<web service|URL:http://www.oreillynet.com/lpt/a/webservices/2002/02/12/webservicefaqs.html>))
27
+ is improving at a rapid rate. For exmaple, Google, a popular Internet
28
+ search engine, provides the web service called the
29
+ ((<Google Web API|URL:http://www.google.com/apis/>)).
30
+ The service enables users to
31
+ develop software that accesses and manipulates a massive amount of web
32
+ documents that are constantly refreshed. In the field of genome
33
+ research, a similar kind of web service called
34
+ ((<DAS|URL:http://www.biodas.org/>)) (distributed annotation system)
35
+ has been used on several web sites, including
36
+ ((<Ensembl|URL:http://www.ensembl.org/>)),
37
+ ((<Wormbase|URL:http://www.wormbase.org/>)),
38
+ ((<Flybase|URL:http://www.flybase.org/>)),
39
+ ((<SGD|URL:http://www.yeastgenome.org/>)),
40
+ ((<TIGR|URL:http://www.tigr.org/>)).
41
+
42
+ With the background and the trends noted above, we have started developing
43
+ a new web service called KEGG API using
44
+ ((<SOAP|URL:http://www.w3.org/TR/SOAP/>)) and
45
+ ((<WSDL|URL:http://www.w3.org/TR/wsdl20/>)).
46
+ The service has been tested with
47
+ ((<Ruby|URL:http://www.ruby-lang.org/>))
48
+ (Ruby 1.8.2 or Ruby 1.6.8 with
49
+ ((<SOAP4R|URL:http://raa.ruby-lang.org/project/soap4r/>))
50
+ version 1.4.8.1) and
51
+ ((<Perl|URL:http://www.perl.org/>))
52
+ (((<SOAP::Lite|URL:http://www.soaplite.com/>)) version 0.55) languages.
53
+ Although the service has not been tested with clients written in other
54
+ languages, it should work if the language can treat SOAP/WSDL.
55
+
56
+ The ((<BioRuby|URL:http://bioruby.org/>)) project prepared a Ruby
57
+ library to handle the KEGG API, so users of the Ruby language should
58
+ check out the latest release of the BioRuby distribution.
59
+
60
+ For the general information on KEGG API, see the following
61
+ page at GenomeNet:
62
+
63
+ * ((<URL:http://www.genome.jp/kegg/soap/>))
64
+
65
+ == Table of contents
66
+
67
+ * ((<Introduction>))
68
+ * ((<KEGG API Quick Start>))
69
+ * ((<Quick Start with Perl>))
70
+ * ((<Perl FAQ>))
71
+ * ((<Quick Start with Ruby>))
72
+ * ((<Quick Start with Python>))
73
+ * ((<Quick Start with Java>))
74
+ * ((<KEGG API Reference>))
75
+ * ((<WSDL file>))
76
+ * ((<Terminology>))
77
+ * ((<Returned values>))
78
+ * ((<SSDBRelation>)), ((<ArrayOfSSDBRelation>))
79
+ * ((<MotifResult>)), ((<ArrayOfMotifResult>))
80
+ * ((<Definition>)), ((<ArrayOfDefinition>))
81
+ * ((<LinkDBRelation>)), ((<ArrayOfLinkDBRelation>))
82
+ * ((<Methods>))
83
+ * ((<Meta information>))
84
+ * ((<list_databases>)),
85
+ ((<list_organisms>)),
86
+ ((<list_pathways>))
87
+ * ((<DBGET>))
88
+ * ((<binfo>)),
89
+ ((<bfind>)),
90
+ ((<bget>)),
91
+ ((<btit>))
92
+ * ((<LinkDB>))
93
+ * ((<get_linkdb_by_entry>))
94
+ * ((<SSDB>))
95
+ * ((<get_best_best_neighbors_by_gene>)),
96
+ ((<get_best_neighbors_by_gene>)),
97
+ ((<get_reverse_best_neighbors_by_gene>)),
98
+ ((<get_paralogs_by_gene>))
99
+ # * ((<get_neighbors_by_gene>)),
100
+ # ((<get_similarity_between_genes>))
101
+ * ((<Motif>))
102
+ * ((<get_motifs_by_gene>)),
103
+ ((<get_genes_by_motifs>))
104
+ * ((<KO, OC, PC>))
105
+ * ((<get_ko_by_gene>)),
106
+ ((<get_ko_by_ko_class>)),
107
+ ((<get_genes_by_ko_class>)),
108
+ ((<get_genes_by_ko>)),
109
+ ((<get_oc_members_by_gene>)),
110
+ ((<get_pc_members_by_gene>))
111
+ # ((<get_ko_members>)),
112
+ * ((<PATHWAY>))
113
+ * ((<mark_pathway_by_objects>)),
114
+ ((<color_pathway_by_objects>)),
115
+ ((<get_html_of_marked_pathway_by_objects>)),
116
+ ((<get_html_of_colored_pathway_by_objects>))
117
+ * ((<get_genes_by_pathway>)),
118
+ ((<get_enzymes_by_pathway>)),
119
+ ((<get_compounds_by_pathway>)),
120
+ ((<get_glycans_by_pathway>)),
121
+ ((<get_reactions_by_pathway>)),
122
+ ((<get_kos_by_pathway>))
123
+ * ((<get_pathways_by_genes>)),
124
+ ((<get_pathways_by_enzymes>)),
125
+ ((<get_pathways_by_compounds>)),
126
+ ((<get_pathways_by_glycans>)),
127
+ ((<get_pathways_by_reactions>)),
128
+ ((<get_pathways_by_kos>))
129
+ * ((<get_linked_pathways>))
130
+ * ((<get_genes_by_enzyme>)),
131
+ ((<get_enzymes_by_gene>))
132
+ * ((<get_enzymes_by_compound>)),
133
+ ((<get_enzymes_by_glycan>)),
134
+ ((<get_enzymes_by_reaction>)),
135
+ ((<get_compounds_by_enzyme>)),
136
+ ((<get_compounds_by_reaction>)),
137
+ ((<get_glycans_by_enzyme>)),
138
+ ((<get_glycans_by_reaction>)),
139
+ ((<get_reactions_by_enzyme>)),
140
+ ((<get_reactions_by_compound>)),
141
+ ((<get_reactions_by_glycan>))
142
+ * ((<GENES>))
143
+ * ((<get_genes_by_organism>))
144
+ * ((<GENOME>))
145
+ * ((<get_number_of_genes_by_organism>))
146
+ * ((<LIGAND>))
147
+ * ((<convert_mol_to_kcf>))
148
+
149
+ == Introduction
150
+
151
+ This guide explains how to use the KEGG API in your programs for
152
+ searching and retrieving data from the KEGG database.
153
+
154
+ == KEGG API Quick Start
155
+
156
+ As always, the best way to become familar with it is by looking at an
157
+ example. In this document, sample codes written in several languages
158
+ are shown. After understanding the first exsample, try other APIs.
159
+
160
+ Firstly, you have to install the SOAP related libraries for the
161
+ programming language of your choice.
162
+
163
+
164
+ === Quick Start with Perl
165
+
166
+ In the case of Perl, you need to install the following packages:
167
+
168
+ * ((<SOAP Lite|URL:http://soaplite.com/>))
169
+ * ((<MIME-Base64|URL:http://search.cpan.org/author/GAAS/MIME-Base64/>))
170
+ * ((<LWP|URL:http://search.cpan.org/author/GAAS/libwww-perl/>))
171
+ * ((<URI|URL:http://search.cpan.org/author/GAAS/URI/>))
172
+
173
+ Here's a first example in Perl language.
174
+
175
+ #!/usr/bin/env perl
176
+
177
+ use SOAP::Lite;
178
+
179
+ $wsdl = 'http://soap.genome.jp/KEGG.wsdl';
180
+
181
+ $serv = SOAP::Lite->service($wsdl);
182
+
183
+ $start = 1;
184
+ $max_results = 5;
185
+
186
+ $top5 = $serv->get_best_neighbors_by_gene('eco:b0002', $start, $max_results);
187
+
188
+ foreach $hit (@{$top5}) {
189
+ print "$hit->{genes_id1}\t$hit->{genes_id2}\t$hit->{sw_score}\n";
190
+ }
191
+
192
+ The output will be
193
+
194
+ eco:b0002 eco:b0002 5283
195
+ eco:b0002 ecj:JW0001 5283
196
+ eco:b0002 sfx:S0002 5271
197
+ eco:b0002 sfl:SF0002 5271
198
+ eco:b0002 ecc:c0003 5269
199
+
200
+ showing that eco:b0002 has Smith-Waterman score 5271 with sfl:SF0002
201
+ as a 4th hit among the entire KEGG/GENES database (here, "eco" means
202
+ E. coli K-12 MG1655 and "sfl" means Shigella flexneri 2457T in the
203
+ KEGG organism codes).
204
+
205
+ The method internally searches the KEGG/SSDB (Sequence Similarity
206
+ Database) database which contains information about the amino acid
207
+ sequence similarities among all protein coding genes in the complete
208
+ genomes, together with information about best hits and bidirectional
209
+ best hits (best-best hits). The relation of gene x in genome A and
210
+ gene y in genome B is called bidirectional best hits, when x is the
211
+ best hit of query y against all genes in A and vice versa, and it is
212
+ often used as an operational definition of ortholog.
213
+
214
+ Next example simply lists PATHWAYs for E. coli ("eco") in KEGG
215
+ database.
216
+
217
+ #!/usr/bin/env perl
218
+
219
+ use SOAP::Lite;
220
+
221
+ $wsdl = 'http://soap.genome.jp/KEGG.wsdl';
222
+
223
+ $results = SOAP::Lite
224
+ -> service($wsdl)
225
+ -> list_pathways("eco");
226
+
227
+ foreach $path (@{$results}) {
228
+ print "$path->{entry_id}\t$path->{definition}\n";
229
+ }
230
+
231
+ This example colors the boxes corresponding to the E. coli genes b1002
232
+ and b2388 on a Glycolysis pathway of E. coli (path:eco00010).
233
+
234
+ #!/usr/bin/env perl
235
+
236
+ use SOAP::Lite;
237
+
238
+ $wsdl = 'http://soap.genome.jp/KEGG.wsdl';
239
+
240
+ $serv = SOAP::Lite -> service($wsdl);
241
+
242
+ $genes = SOAP::Data->type(array => ["eco:b1002", "eco:b2388"]);
243
+
244
+ $result = $serv -> mark_pathway_by_objects("path:eco00010", $genes);
245
+
246
+ print $result; # URL of the generated image
247
+
248
+ ==== Perl FAQ
249
+
250
+ As you see in the above example, you always need to convert a Perl's array
251
+ into a SOAP object expicitly in SOAP::Lite by
252
+
253
+ SOAP::Data->type(array => [value1, value2, .. ])
254
+
255
+ when you pass an array as the argument for any KEGG API method.
256
+
257
+ === Quick Start with Ruby
258
+
259
+ If you are using Ruby 1.8.1 or later, you are ready to use KEGG API
260
+ as Ruby already supports SOAP in its standard library.
261
+
262
+ If your Ruby is 1.6.8 or older, you need to install followings:
263
+
264
+ * ((<SOAP4R|URL:http://raa.ruby-lang.org/list.rhtml?name=soap4r>)) 1.5.1 or later
265
+ * One of the following XML processing library
266
+ * ((<rexml|URL:http://raa.ruby-lang.org/list.rhtml?name=rexml>))
267
+ * ((<xmlparser|URL:http://raa.ruby-lang.org/list.rhtml?name=xmlparser>))
268
+ * ((<xmlscan|URL:http://raa.ruby-lang.org/list.rhtml?name=xmlscan>))
269
+ * ((<date2|URL:http://raa.ruby-lang.org/list.rhtml?name=date2>))
270
+ * ((<devel-logger|URL:http://raa.ruby-lang.org/list.rhtml?name=devel-logger>))
271
+ * ((<uconv|URL:http://raa.ruby-lang.org/list.rhtml?name=uconv>))
272
+ * ((<http-access2|URL:http://raa.ruby-lang.org/list.rhtml?name=http-access2>))
273
+
274
+ Here's a sample code for Ruby having the same functionality with Perl's
275
+ first example shown above.
276
+
277
+ #!/usr/bin/env ruby
278
+
279
+ require 'soap/wsdlDriver'
280
+
281
+ wsdl = "http://soap.genome.jp/KEGG.wsdl"
282
+ serv = SOAP::WSDLDriverFactory.new(wsdl).createDriver
283
+ serv.generate_explicit_type = true
284
+ # if uncommented, you can see transactions for debug
285
+ #serv.wiredump_dev = STDERR
286
+
287
+ start = 1
288
+ max_results = 5
289
+
290
+ top5 = serv.get_best_neighbors_by_gene('eco:b0002', start, max_results)
291
+ top5.each do |hit|
292
+ print hit.genes_id1, "\t", hit.genes_id2, "\t", hit.sw_score, "\n"
293
+ end
294
+
295
+ You may need to iterate to obtain all the results by increasing start
296
+ and/or max_results.
297
+
298
+ #!/usr/bin/env ruby
299
+
300
+ require 'soap/wsdlDriver'
301
+
302
+ wsdl = "http://soap.genome.jp/KEGG.wsdl"
303
+ serv = SOAP::WSDLDriverFactory.new(wsdl).create_driver
304
+ serv.generate_explicit_type = true
305
+
306
+ start = 1
307
+ max_results = 100
308
+
309
+ loop do
310
+ results = serv.get_best_neighbors_by_gene('eco:b0002', start, max_results)
311
+ break unless results
312
+ results.each do |hit|
313
+ print hit.genes_id1, "\t", hit.genes_id2, "\t", hit.sw_score, "\n"
314
+ end
315
+ start += max_results
316
+ end
317
+
318
+ It is automatically done by using ((<BioRuby|URL:http://bioruby.org/>))
319
+ library, which implements get_all_* methods for this. BioRuby also
320
+ provides filtering functionality for selecting needed fields from the
321
+ complex data type.
322
+
323
+ #!/usr/bin/env ruby
324
+
325
+ require 'bio'
326
+
327
+ serv = Bio::KEGG::API.new
328
+
329
+ results = serv.get_all_best_neighbors_by_gene('eco:b0002')
330
+
331
+ results.each do |hit|
332
+ print hit.genes_id1, "\t", hit.genes_id2, "\t", hit.sw_score, "\n"
333
+ end
334
+
335
+ # Same as above but using filter to select fields
336
+ fields = [:genes_id1, :genes_id2, :sw_score]
337
+ results.each do |hit|
338
+ puts hit.filter(fields).join("\t")
339
+ end
340
+
341
+ # Different filters to pick additional fields for each amino acid sequence
342
+ fields1 = [:genes_id1, :start_position1, :end_position1, :best_flag_1to2]
343
+ fields2 = [:genes_id2, :start_position2, :end_position2, :best_flag_2to1]
344
+ results.each do |hit|
345
+ print "> score: ", hit.sw_score, ", identity: ", hit.identity, "\n"
346
+ print "1:\t", hit.filter(fields1).join("\t"), "\n"
347
+ print "2:\t", hit.filter(fields2).join("\t"), "\n"
348
+ end
349
+
350
+ The equivalent for the Perl's second example described above will be
351
+
352
+ #!/usr/bin/env ruby
353
+
354
+ require 'bio'
355
+
356
+ serv = Bio::KEGG::API.new
357
+
358
+ list = serv.list_pathways("eco")
359
+ list.each do |path|
360
+ print path.entry_id, "\t", path.definition, "\n"
361
+ end
362
+
363
+ and equivalent for the last example is as follows.
364
+
365
+ #!/usr/bin/env ruby
366
+
367
+ require 'bio'
368
+
369
+ serv = Bio::KEGG::API.new
370
+
371
+ genes = ["eco:b1002", "eco:b2388"]
372
+
373
+ result = serv.mark_pathway_by_objects("path:eco00010", genes)
374
+
375
+ print result # URL of the generated image
376
+
377
+
378
+ === Quick Start with Python
379
+
380
+ In the case of Python, you have to install
381
+
382
+ * ((<SOAPpy|URL:http://pywebsvcs.sourceforge.net/>))
383
+
384
+ plus some extra packages required for SOAPpy (
385
+ ((<fpconst|URL:http://www.analytics.washington.edu/Zope/projects/fpconst>)),
386
+ ((<PyXML|URL:http://pyxml.sourceforge.net/>)) etc.).
387
+
388
+ Here's a sample code using KEGG API with Python.
389
+
390
+ #!/usr/bin/env python
391
+
392
+ from SOAPpy import WSDL
393
+
394
+ wsdl = 'http://soap.genome.jp/KEGG.wsdl'
395
+ serv = WSDL.Proxy(wsdl)
396
+
397
+ results = serv.get_genes_by_pathway('path:eco00020')
398
+ print results
399
+
400
+
401
+ === Quick Start with Java
402
+
403
+ In the case of Java, you need to obtain Apache Axis library version
404
+ axis-1_2alpha or newer (axis-1_1 doesn't work properly for KEGG API)
405
+
406
+ * ((<Apache Axis|URL:http://ws.apache.org/axis/>))
407
+
408
+ and put required jar files in an appropriate directory.
409
+
410
+ For the binary distribution of the Apache axis-1_2alpha release, copy
411
+ the jar files stored under the axis-1_2alpha/lib/ to the directory of
412
+ your choice.
413
+
414
+ % cp axis-1_2alpha/lib/*.jar /path/to/lib/
415
+
416
+ You can use WSDL2Java coming with Apache Axis to generate classes
417
+ needed for the KEGG API automatically.
418
+
419
+ To generate classes and documents for the KEGG API, download the script
420
+ ((<axisfix.pl|URL:http://www.genome.jp/kegg/soap/support/axisfix.pl>))
421
+ and follow the steps below:
422
+
423
+ % java -classpath /path/to/lib/axis.jar:/path/to/lib/jaxrpc.jar:/path/to/lib/commons-logging.jar:/path/to/lib/commons-discovery.jar:/path/to/lib/saaj.jar:/path/to/lib/wsdl4j.jar:. org.apache.axis.wsdl.WSDL2Java -p keggapi http://soap.genome.jp/KEGG.wsdl
424
+ % perl -i axisfix.pl keggapi/KEGGBindingStub.java
425
+ % javac -classpath /path/to/lib/axis.jar:/path/to/lib/jaxrpc.jar:/path/to/lib/wsdl4j.jar:. keggapi/KEGGLocator.java
426
+ % jar cvf keggapi.jar keggapi/*
427
+ % javadoc -classpath /path/to/lib/axis.jar:/path/to/lib/jaxrpc.jar -d keggapi_javadoc keggapi/*.java
428
+
429
+ This program will do the same job as the Python's example (extended to
430
+ accept a pathway_id as the argument).
431
+
432
+ import keggapi.*;
433
+
434
+ class GetGenesByPathway {
435
+ public static void main(String[] args) throws Exception {
436
+ KEGGLocator locator = new KEGGLocator();
437
+ KEGGPortType serv = locator.getKEGGPort();
438
+
439
+ String query = args[0];
440
+ String[] results = serv.get_genes_by_pathway(query);
441
+
442
+ for (int i = 0; i < results.length; i++) {
443
+ System.out.println(results[i]);
444
+ }
445
+ }
446
+ }
447
+
448
+ This is another example which uses ArrayOfSSDBRelation data type.
449
+
450
+ import keggapi.*;
451
+
452
+ class GetBestNeighborsByGene {
453
+ public static void main(String[] args) throws Exception {
454
+ KEGGLocator locator = new KEGGLocator();
455
+ KEGGPortType serv = locator.getKEGGPort();
456
+
457
+ String query = args[0];
458
+ SSDBRelation[] results = null;
459
+
460
+ results = serv.get_best_neighbors_by_gene(query, 1, 50);
461
+
462
+ for (int i = 0; i < results.length; i++) {
463
+ String gene1 = results[i].getGenes_id1();
464
+ String gene2 = results[i].getGenes_id2();
465
+ int score = results[i].getSw_score();
466
+ System.out.println(gene1 + "\t" + gene2 + "\t" + score);
467
+ }
468
+ }
469
+ }
470
+
471
+ Compile and execute this program (don't forget to include keggapi.jar file
472
+ in your classpath) as follows:
473
+
474
+ % javac -classpath /path/to/lib/axis.jar:/path/to/lib/jaxrpc.jar:/path/to/lib/wsdl4j.jar:/path/to/keggapi.jar GetBestNeighborsByGene.java
475
+
476
+ % java -classpath /path/to/lib/axis.jar:/path/to/lib/jaxrpc.jar:/path/to/lib/commons-logging.jar:/path/to/lib/commons-discovery.jar:/path/to/lib/saaj.jar:/path/to/lib/wsdl4j.jar:/path/to/keggapi.jar:. GetBestNeighborsByGene eco:b0002
477
+
478
+ You may wish to set the CLASSPATH environmental variable.
479
+
480
+ bash/zsh:
481
+
482
+ % for i in /path/to/lib/*.jar
483
+ do
484
+ CLASSPATH="${CLASSPATH}:${i}"
485
+ done
486
+ % export CLASSPATH
487
+
488
+ tcsh:
489
+
490
+ % foreach i ( /path/to/lib/*.jar )
491
+ setenv CLASSPATH ${CLASSPATH}:${i}
492
+ end
493
+
494
+ For the other cases, consult the javadoc pages generated by WSDL2Java.
495
+
496
+ * ((<URL:http://www.genome.jp/kegg/soap/doc/keggapi_javadoc/>))
497
+
498
+
499
+ == KEGG API Reference
500
+
501
+ === WSDL file
502
+
503
+ Users can use a WSDL file to create a SOAP client driver. The WSDL file for
504
+ the KEGG API can be found at:
505
+
506
+ * ((<URL:http://soap.genome.jp/KEGG.wsdl>))
507
+
508
+ === Terminology
509
+
510
+ * 'org' is a three-letter organism code used in KEGG. The list can be
511
+ found at (see the description of the list_organisms method below):
512
+
513
+ * ((<URL:http://www.genome.jp/kegg/catalog/org_list.html>))
514
+
515
+ * 'db' is a database name used in GenomeNet service. See the
516
+ description of the list_databases method below.
517
+
518
+ * 'entry_id' is a unique identifier of which format is the combination of
519
+ the database name and the identifier of an entry joined by a colon sign
520
+ as 'database:entry' (e.g. 'embl:J00231' means an EMBL entry 'J00231').
521
+ 'entry_id' includes 'genes_id', 'enzyme_id', 'compound_id', 'glycan_id',
522
+ 'reaction_id', 'pathway_id' and 'motif_id' described in below.
523
+
524
+ * 'genes_id' is a gene identifier used in KEGG/GENES which consists of
525
+ 'keggorg' and a gene name (e.g. 'eco:b0001' means an E. coli gene 'b0001').
526
+
527
+ * 'enzyme_id' is an enzyme identifier consisting of database name 'ec'
528
+ and an enzyme code used in KEGG/LIGAND (e.g. 'ec:1.1.1.1' means an
529
+ alcohol dehydrogenase enzyme)
530
+
531
+ * 'compound_id' is a compound identifier consisting of database name 'cpd'
532
+ and a compound number used in KEGG/LIGAND (e.g. 'cpd:C00158' means a
533
+ citric acid). Note that some compounds also have 'glycan_id' and
534
+ both IDs are accepted and converted internally by the corresponding
535
+ methods.
536
+
537
+ * 'glycan_id' is a glycan identifier consisting of database name 'gl'
538
+ and a glycan number used in KEGG/GLYCAN (e.g. 'gl:G00050' means a
539
+ Paragloboside). Note that some glycans also have 'compound_id' and
540
+ both IDs are accepted and converted internally by the corresponding
541
+ methods.
542
+
543
+ * 'reaction_id' is a reaction identifier consisting of database name 'rn'
544
+ and a reaction number used in KEGG/REACTION (e.g. 'rn:R00959' is a
545
+ reaction which catalyze cpd:C00103 into cpd:C00668)
546
+
547
+ * 'pathway_id' is a pathway identifier consisting of 'path' and a pathway
548
+ number used in KEGG/PATHWAY. Pathway numbers prefixed by 'map' specify
549
+ the reference pathway and pathways prefixed by the 'keggorg' specify
550
+ pathways specific to the organism (e.g. 'path:map00020' means a reference
551
+ pathway for the cytrate cycle and 'path:eco00020' means a same pathway of
552
+ which E. coli genes are marked).
553
+
554
+ * 'motif_id' is a motif identifier consisting of motif database names
555
+ ('ps' for prosite, 'bl' for blocks, 'pr' for prints, 'pd' for prodom,
556
+ and 'pf' for pfam) and a motif entry name. (e.g. 'pf:DnaJ' means a Pfam
557
+ database entry 'DnaJ').
558
+
559
+ * 'ko_id' is a KO identifier consisting of 'ko' and a ko number used in
560
+ KEGG/KO. KO (KEGG Orthology) is an classification of orthologous genes
561
+ defined by KEGG (e.g. 'ko:K02598' means a KO group for nitrite transporter
562
+ NirC genes).
563
+
564
+ * 'ko_class_id' is a KO class identifier which is used to classify
565
+ 'ko_id' hierarchically (e.g. '01110' means a 'Carbohydrate Metabolism'
566
+ class).
567
+
568
+ * ((<URL:http://www.genome.jp/dbget-bin/get_htext?KO>))
569
+
570
+ * 'start' and 'max_result' are both an integer and used to control the
571
+ number of the results returned at once. Methods having these arguments
572
+ will return first 'max_result' results starting from 'start'th.
573
+
574
+ * 'fg_color_list' is a list of colors for the foreground (corresponding
575
+ to the texts and borders of the objects on the KEGG pathway map).
576
+
577
+ * 'bg_color_list' is a list of colors for the background (corresponding
578
+ to the inside of the objects on the KEGG pathway map).
579
+
580
+ === Returned values
581
+
582
+ Many of the KEGG API methods will return a set of values in a complex data
583
+ structure as described below. This section summarizes all kind of these
584
+ data types. Note that, the retuened values for the empty result will be
585
+ * an empty array -- for the methods which return ArrayOf'OBJ'
586
+ * an empty string -- for the methods which return String
587
+ * -1 -- for the methods which return int
588
+ * NULL -- for the methods which return any other 'OBJ'
589
+
590
+ + SSDBRelation
591
+
592
+ SSDBRelation data type contains the following fields:
593
+
594
+ genes_id1 genes_id of the query (string)
595
+ genes_id2 genes_id of the target (string)
596
+ sw_score Smith-Waterman score between genes_id1 and genes_id2 (int)
597
+ bit_score bit score between genes_id1 and genes_id2 (float)
598
+ identity identity between genes_id1 and genes_id2 (float)
599
+ overlap overlap length between genes_id1 and genes_id2 (int)
600
+ start_position1 start position of the alignment in genes_id1 (int)
601
+ end_position1 end position of the alignment in genes_id1 (int)
602
+ start_position2 start position of the alignment in genes_id2 (int)
603
+ end_position2 end position of the alignment in genes_id2 (int)
604
+ best_flag_1to2 best flag from genes_id1 to genes_id2 (boolean)
605
+ best_flag_2to1 best flag from genes_id2 to genes_id1 (boolean)
606
+ definition1 definition string of the genes_id1 (string)
607
+ definition2 definition string of the genes_id2 (string)
608
+ length1 amino acid length of the genes_id1 (int)
609
+ length2 amino acid length of the genes_id2 (int)
610
+
611
+ Notice (26 Nov, 2004):
612
+
613
+ We found a serious bug with the 'best_flag_1to2' and 'best_flag_2to1'
614
+ fields in the SSDBRelation data type. The methods returning the
615
+ SSDBRelation (and ArrayOfSSDBRelation) data type had returned the
616
+ opposite values of the intended results with the both fields.
617
+ The following methods had been affected by this bug:
618
+
619
+ # * get_neighbors_by_gene
620
+ * get_best_neighbors_by_gene
621
+ * get_reverse_best_neighbors_by_gene
622
+ * get_paralogs_by_gene
623
+ # * get_similarity_between_genes
624
+
625
+ This problem is fixed in the version 3.2.
626
+
627
+ + ArrayOfSSDBRelation
628
+
629
+ ArrayOfSSDBRelation data type is a list of the SSDBRelation data type.
630
+
631
+ + MotifResult
632
+
633
+ MotifResult data type contains the following fields:
634
+
635
+ motif_id motif_id of the motif (string)
636
+ definition definition of the motif (string)
637
+ genes_id genes_id of the gene containing the motif (string)
638
+ start_position start position of the motif match (int)
639
+ end_position end position of the motif match (int)
640
+ score score of the motif match for TIGRFAM and PROSITE (float)
641
+ evalue E-value of the motif match for Pfam (double)
642
+
643
+ Note: 'score' and/or 'evalue' is set to -1 if the corresponding value is
644
+ not applicable.
645
+
646
+ + ArrayOfMotifResult
647
+
648
+ ArrayOfMotifResult data type is a list of the MotifResult data type.
649
+
650
+ + Definition
651
+
652
+ Definition data type contains the following fields:
653
+
654
+ entry_id database entry_id (string)
655
+ definition definition of the entry (string)
656
+
657
+ + ArrayOfDefinition
658
+
659
+ ArrayOfDefinition data type is a list of the Definition data type.
660
+
661
+ + LinkDBRelation
662
+
663
+ LinkDBRelation data type contains the following fields:
664
+
665
+ entry_id1 entry_id of the starting entry (string)
666
+ entry_id2 entry_id of the terminal entry (string)
667
+ type type of the link as "direct" or "indirect" (string)
668
+ path link path information across the databases (string)
669
+
670
+ + ArrayOfLinkDBRelation
671
+
672
+ ArrayOfLinkDBRelation data type is a list of the LinkDBRelation data type.
673
+
674
+ === Methods
675
+
676
+ ==== Meta information
677
+
678
+ This section describes the APIs for retrieving the general information
679
+ concerning latest version of the KEGG database.
680
+
681
+ --- list_databases
682
+
683
+ List of database names and its definitions available on the GenomeNet
684
+ is returned.
685
+
686
+ Return value:
687
+ ArrayOfDefinition (db, definition)
688
+
689
+ --- list_organisms
690
+
691
+ List up the organisms in the KEGG/GENES database. 'org' code and the
692
+ organism's full name is returned in the Definition data type.
693
+
694
+ Return value:
695
+ ArrayOfDefinition (org, definition)
696
+
697
+ --- list_pathways(org)
698
+
699
+ List up the pathway maps of the given organism in the KEGG/PATHWAY database.
700
+ Passing the string "map" as its argument, this method returns a list of the
701
+ reference pathways.
702
+
703
+ Return value:
704
+ ArrayOfDefinition (pathway_id, definition)
705
+
706
+ ==== DBGET
707
+
708
+ This section describes the wrapper methods for DBGET system developed
709
+ at the GenomeNet. For more details on DBGET system, see:
710
+
711
+ * ((<URL:http://www.genome.jp/dbget/dbget_manual.html>))
712
+
713
+ --- binfo(string)
714
+
715
+ Show the version information of the specified database.
716
+ Passing the string "all" as its argument, this method returns the version
717
+ information of all databases available on the GenomeNet.
718
+
719
+ Return value:
720
+ string
721
+
722
+ Example:
723
+ # Show the information of the latest GenBank database.
724
+ binfo("gb")
725
+
726
+ --- bfind(string)
727
+
728
+ Wrapper method for bfind command. bfind is used for searching entries by
729
+ keywords. User need to specify a database from those which are supported
730
+ by DBGET system before keywords. Number of keywords given at a time is
731
+ restricted up to 100.
732
+
733
+ Return value:
734
+ string
735
+
736
+ Example:
737
+ # Returns the IDs and definitions of entries which have definition
738
+ # including the word 'E-cadherin' and 'human' from GenBank.
739
+ bfind("gb E-cadherin human")
740
+
741
+ --- bget(string)
742
+
743
+ The bget command is used for retrieving database entries specified by a list
744
+ of 'entry_id'. This method accepts all the bget command line options as
745
+ a string. Number of entries retrieved at a time is restricted up to 100.
746
+
747
+ Return value:
748
+ string
749
+
750
+ Example:
751
+ # retrieve two KEGG/GENES entries
752
+ bget("eco:b0002 hin:tRNA-Cys-1")
753
+ # retrieve nucleic acid sequences in a FASTA format
754
+ bget("-f -n n eco:b0002 hin:tRNA-Cys-1")
755
+ # retrieve amino acid sequence in a FASTA format
756
+ bget("-f -n a eco:b0002")
757
+
758
+ --- btit(string)
759
+
760
+ Wrapper method for btit command. btit is used for retrieving the definitions
761
+ by given database entries. Number of entries given at a time is restricted
762
+ up to 100.
763
+
764
+ Return value:
765
+ string
766
+
767
+ Example:
768
+ # Returns the ids and definitions of four GENES entries "hsa:1798",
769
+ # "mmu:13478", "dme:CG5287-PA" and cel:Y60A3A.14".
770
+ btit("hsa:1798 mmu:13478 dme:CG5287-PA cel:Y60A3A.14")
771
+
772
+ ==== LinkDB
773
+
774
+ --- get_linkdb_by_entry(entry_id, db, start, max_results)
775
+
776
+ Retrieve the database entries linked from the user specified database entry.
777
+ It can also be specified the targeted database.
778
+
779
+ Return value:
780
+ ArrayOfLinkDBRelation
781
+
782
+ Example:
783
+ # Get the entries of KEGG/PATHWAY database linked from the entry 'eco:b0002'.
784
+ get_linkdb_by_entry('eco:b0002', 'pathway', 1, 10)
785
+ get_linkdb_by_entry('eco:b0002', 'pathway', 11, 10)
786
+
787
+ ==== SSDB
788
+
789
+ This section describes the APIs for SSDB database. For more details
790
+ on SSDB, see:
791
+
792
+ * ((<URL:http://www.genome.jp/kegg/ssdb/>))
793
+
794
+ #--- get_neighbors_by_gene(genes_id, org, start, max_results)
795
+ #
796
+ #Search homologous genes of the user specified 'genes_id' from specified
797
+ #organism (or from all organisms if 'all' is given as org).
798
+ #
799
+ #Return value:
800
+ # ArrayOfSSDBRelation
801
+ #
802
+ #Examples:
803
+ # # This will search all homologous genes of E. coli gene 'b0002'
804
+ # # in the SSDB and returns the first ten results.
805
+ # get_neighbors_by_gene('eco:b0002', 'all', 1, 10)
806
+ # # Next ten results.
807
+ # get_neighbors_by_gene('eco:b0002', 'all', 11, 10)
808
+
809
+ --- get_best_best_neighbors_by_gene(genes_id, start, max_results)
810
+
811
+ Search best-best neighbor of the gene in all organisms.
812
+
813
+ Return value:
814
+ ArrayOfSSDBRelation
815
+
816
+ Example:
817
+ # List up best-best neighbors of 'eco:b0002'.
818
+ get_best_best_neighbors_by_gene('eco:b0002', 1, 10)
819
+ get_best_best_neighbors_by_gene('eco:b0002', 11, 10)
820
+
821
+ --- get_best_neighbors_by_gene(genes_id, start, max_results)
822
+
823
+ Search best neighbors in all organism.
824
+
825
+ Return value:
826
+ ArrayOfSSDBRelation
827
+
828
+ Example:
829
+ # List up best neighbors of 'eco:b0002'.
830
+ get_best_neighbors_by_gene('eco:b0002', 1, 10)
831
+ get_best_neighbors_by_gene('eco:b0002', 11, 10)
832
+
833
+ --- get_reverse_best_neighbors_by_gene(genes_id, start, max_results)
834
+
835
+ Search reverse best neighbors in all organisms.
836
+
837
+ Return value:
838
+ ArrayOfSSDBRelation
839
+
840
+ Example:
841
+ # List up reverse best neighbors of 'eco:b0002'.
842
+ get_reverse_best_neighbors_by_gene('eco:b0002', 1, 10)
843
+ get_reverse_best_neighbors_by_gene('eco:b0002', 11, 10)
844
+
845
+ --- get_paralogs_by_gene(genes_id, start, max_results)
846
+
847
+ Search paralogous genes of the given gene in the same organism.
848
+
849
+ Return value:
850
+ ArrayOfSSDBRelation
851
+
852
+ Example:
853
+ # List up paralogous genes of 'eco:b0002'.
854
+ get_paralogs_by_gene('eco:b0002', 1, 10)
855
+ get_paralogs_by_gene('eco:b0002', 11, 10)
856
+
857
+ #--- get_similarity_between_genes(genes_id1, genes_id2)
858
+ #
859
+ #Returns data containing Smith-Waterman score and alignment positions
860
+ #between the two genes.
861
+ #
862
+ #Return value:
863
+ # SSDBRelation
864
+ #
865
+ #Example:
866
+ # # Returns a 'sw_score' between two E. coli genes 'b0002' and 'b3940'
867
+ # get_similarity_between_genes('eco:b0002', 'eco:b3940')
868
+
869
+ ==== Motif
870
+
871
+ --- get_motifs_by_gene(genes_id, db)
872
+
873
+ Search motifs in the specified gene. As for 'db',
874
+ user can specify one of the four database; Pfam, TIGRFAM, PROSITE pattern,
875
+ PROSITE profile as 'pfam', 'tfam', 'pspt', 'pspf', respectively.
876
+ You can also use 'all' to specify all of the four databases above.
877
+
878
+ Return value:
879
+ ArrayOfMotifResult
880
+
881
+ Example:
882
+ # Returns the all pfam motifs in the E. coli gene 'b0002'
883
+ get_motifs_by_gene('eco:b0002', 'pfam')
884
+
885
+ --- get_genes_by_motifs(motif_id_list, start, max_results)
886
+
887
+ Search all genes which contains all of the specified motifs.
888
+
889
+ Return value:
890
+ ArrayOfDefinition (genes_id, definition)
891
+
892
+ Example:
893
+ # Returns all genes which have Pfam 'DnaJ' and Prosite 'DNAJ_2' motifs.
894
+ list = ['pf:DnaJ', 'ps:DNAJ_2']
895
+ get_genes_by_motifs(list, 1, 10)
896
+ get_genes_by_motifs(list, 11, 10)
897
+
898
+
899
+ ==== KO, OC, PC
900
+
901
+ --- get_ko_by_gene(genes_id)
902
+
903
+ Search all KOs to which given genes_id belongs.
904
+
905
+ Return value:
906
+ ArrayOfstring (ko_id)
907
+
908
+ Example:
909
+ # Returns ko_ids to which GENES entry 'eco:b0002' belongs.
910
+ get_ko_by_gene('eco:b0002')
911
+
912
+ #--- get_ko_members(ko_id)
913
+ #
914
+ #Returns all genes assigned to the given KO entry.
915
+ #
916
+ #Return value:
917
+ # ArrayOfstring (genes_id)
918
+ #
919
+ #Example
920
+ # # Returns genes_ids those which belong to KO entry 'ko:K02598'.
921
+ # get_ko_members('ko:K02598')
922
+
923
+ --- get_ko_by_ko_class(ko_class_id)
924
+
925
+ Return all KOs which belong to the given ko_class_id.
926
+
927
+ Return value:
928
+ ArrayOfDefinition (ko_id, definition)
929
+
930
+ Example:
931
+ # Returns ko_ids which belong to the KO class '01196'.
932
+ get_ko_by_ko_class('01196')
933
+
934
+ --- get_genes_by_ko_class(ko_class_id, org, start, max_results)
935
+
936
+ Retrieve all genes of the specified organism which are classified
937
+ under the given ko_class_id.
938
+
939
+ Return value:
940
+ ArrayOfDefinition (genes_id, definition)
941
+
942
+ Example:
943
+ # Returns first 100 human genes which belong to the KO class '00930'
944
+ get_genes_by_ko_class('00903', 'hsa' , 1, 100)
945
+
946
+ --- get_genes_by_ko(ko_id, org)
947
+
948
+ Retrieve all genes of the specified organism which belong to the
949
+ given ko_id.
950
+
951
+ Return value:
952
+ ArrayOfDefinition (genes_id, definition)
953
+
954
+ Example
955
+ # Returns E.coli genes which belong to the KO 'K00001'
956
+ get_genes_by_ko('ko:K00001', 'eco')
957
+
958
+ # Returns genes of all organisms which are assigned to the KO 'K00010'
959
+ get_genes_by_ko('ko:K00010', 'all')
960
+
961
+ --- get_oc_members_by_gene(genes_id, start, max_results)
962
+
963
+ Search all members of the same OC (KEGG Ortholog Cluster) to which given
964
+ genes_id belongs.
965
+
966
+ Return value:
967
+ ArrayOfstring (genes_id)
968
+
969
+ Example
970
+ # Returns genes belonging to the same OC with eco:b0002 gene.
971
+ get_oc_members_by_gene('eco:b0002', 1, 10)
972
+ get_oc_members_by_gene('eco:b0002', 11, 10)
973
+
974
+ --- get_pc_members_by_gene(genes_id, start, max_results)
975
+
976
+ Search all members of the same PC (KEGG Paralog Cluster) to which given
977
+ genes_id belongs.
978
+
979
+ Return value:
980
+ ArrayOfstring (genes_id)
981
+
982
+ Example
983
+ # Returns genes belonging to the same PC with eco:b0002 gene.
984
+ get_pc_members_by_gene('eco:b0002', 1, 10)
985
+ get_pc_members_by_gene('eco:b0002', 11, 10)
986
+
987
+
988
+ ==== PATHWAY
989
+
990
+ This section describes the APIs for PATHWAY database. For more details
991
+ on PATHWAY database, see:
992
+
993
+ * ((<URL:http://www.genome.jp/kegg/kegg2.html#pathway>))
994
+
995
+ + Coloring pathways
996
+
997
+ --- mark_pathway_by_objects(pathway_id, object_id_list)
998
+
999
+ Mark the given objects on the given pathway map and return the URL of the
1000
+ generated image.
1001
+
1002
+ Return value:
1003
+ string (URL)
1004
+
1005
+ Example:
1006
+ # Returns the URL of the generated image for the given map 'path:eco00260'
1007
+ # with objects corresponding to 'eco:b0002' and 'cpd:C00263' colored in red.
1008
+ obj_list = ['eco:b0002', 'cpd:C00263']
1009
+ mark_pathway_by_objects('path:eco00260', obj_list)
1010
+
1011
+ --- color_pathway_by_objects(pathway_id, object_id_list, fg_color_list, bg_color_list)
1012
+
1013
+ Color the given objects on the pathway map with the specified colors
1014
+ and return the URL of the colored image. In the KEGG pathway maps,
1015
+ a gene or enzyme is represented by a rectangle and a compound is
1016
+ shown as a small circle. 'fg_color_list' is used for specifying the
1017
+ color of text and border of the given objects and 'bg_color_list' is
1018
+ used for its background area. The order of colors in these lists
1019
+ correspond with the order of objects in the 'object_id_list' list.
1020
+
1021
+ Return value:
1022
+ string (URL)
1023
+
1024
+ Example:
1025
+ # Returns the URL for the given pathway 'path:eco00260' with genes
1026
+ # 'eco:b0514' colored in red with yellow background and
1027
+ # 'eco:b2913' colored in green with yellow background.
1028
+ obj_list = ['eco:b0514', 'eco:b2913']
1029
+ fg_list = ['#ff0000', '#00ff00']
1030
+ bg_list = ['#ffff00', 'yellow']
1031
+ color_pathway_by_objects('path:eco00260', obj_list, fg_list, bg_list)
1032
+
1033
+ --- get_html_of_marked_pathway_by_objects(pathway_id, object_id_list)
1034
+
1035
+ HTML version of the 'mark_pathway_by_objects' method.
1036
+ Mark the given objects on the given pathway map and return the URL of the
1037
+ HTML with the generated image as a clickable map.
1038
+
1039
+ Return value:
1040
+ string (URL)
1041
+
1042
+ Example:
1043
+ # Returns the URL of the HTML which can be passed to the web browser
1044
+ # as a clickable map of the generated image of the given pathway
1045
+ # 'path:eco00970' with three objects corresponding to 'eco:b4258',
1046
+ # 'cpd:C00135' and 'ko:K01881' colored in red.
1047
+ obj_list = ['eco:b4258', 'cpd:C00135', 'ko:K01881']
1048
+ get_html_of_marked_pathway_by_objects('path:eco00970', obj_list)
1049
+
1050
+ --- get_html_of_colored_pathway_by_objects(pathway_id, object_id_list, fg_color_list, bg_color_list)
1051
+
1052
+ HTML version of the 'color_pathway_by_object' method.
1053
+ Color the given objects on the pathway map with the specified colors
1054
+ and return the URL of the HTML containing the colored image as a
1055
+ clickable map.
1056
+
1057
+ Return value:
1058
+ string (URL)
1059
+
1060
+ Example:
1061
+ # Returns the URL of the HTML which can be passed to the web browser
1062
+ # as a clickable map of coloerd image of the given pathway 'path:eco00970'
1063
+ # with a gene 'eco:b4258' colored in gray/red, a compound 'cpd:C00135'
1064
+ # coloerd in green/yellow and a KO 'ko:K01881' colored in blue/orange.
1065
+ obj_list = ['eco:b4258', 'cpd:C00135', 'ko:K01881']
1066
+ fg_list = ['gray', '#00ff00', 'blue']
1067
+ bg_list = ['#ff0000', 'yellow', 'orange']
1068
+ get_html_of_colored_pathway_by_objects('path:eco00970', obj_list, fg_list, bg_list)
1069
+
1070
+
1071
+ + Objects on the pathway
1072
+
1073
+ --- get_genes_by_pathway(pathway_id)
1074
+
1075
+ Search all genes on the specified pathway. Organism name is given by
1076
+ the name of the pathway map.
1077
+
1078
+ Return value:
1079
+ ArrayOfstring (genes_id)
1080
+
1081
+ Example:
1082
+ # Returns all E. coli genes on the pathway map '00020'.
1083
+ get_genes_by_pathway('path:eco00020')
1084
+
1085
+ --- get_enzymes_by_pathway(pathway_id)
1086
+
1087
+ Search all enzymes on the specified pathway.
1088
+
1089
+ Return value:
1090
+ ArrayOfstring (enzyme_id)
1091
+
1092
+ Example:
1093
+ # Returns all E. coli enzymes on the pathway map '00020'.
1094
+ get_enzymes_by_pathway('path:eco00020')
1095
+
1096
+ --- get_compounds_by_pathway(pathway_id)
1097
+
1098
+ Search all compounds on the specified pathway.
1099
+
1100
+ Return value:
1101
+ ArrayOfstring (compound_id)
1102
+
1103
+ Example:
1104
+ # Returns all E. coli compounds on the pathway map '00020'.
1105
+ get_compounds_by_pathway('path:eco00020')
1106
+
1107
+ --- get_glycans_by_pathway(pathway_id)
1108
+
1109
+ Search all glycans on the specified pathway.
1110
+
1111
+ Return value:
1112
+ ArrayOfstring (glycan_id)
1113
+
1114
+ Example
1115
+ # Returns all E. coli glycans on the pathway map '00510'
1116
+ get_glycans_by_pathway('path:eco00510')
1117
+
1118
+ --- get_reactions_by_pathway(pathway_id)
1119
+
1120
+ Retrieve all reactions on the specified pathway.
1121
+
1122
+ Return value:
1123
+ ArrayOfstring (reaction_id)
1124
+
1125
+ Example:
1126
+ # Returns all E. coli reactions on the pathway map '00260'
1127
+ get_reactions_by_pathway('path:eco00260')
1128
+
1129
+ --- get_kos_by_pathway(pathway_id)
1130
+
1131
+ Retrieve all KOs on the specified pathway.
1132
+
1133
+ Return value:
1134
+ ArrayOfstring (ko_id)
1135
+
1136
+ Example:
1137
+ # Returns all ko_ids on the pathway map 'path:hsa00010'
1138
+ get_kos_by_pathway('path:hsa00010')
1139
+
1140
+
1141
+ + Pathways by objects
1142
+
1143
+ --- get_pathways_by_genes(genes_id_list)
1144
+
1145
+ Search all pathways which include all the given genes. How to pass the
1146
+ list of genes_id will depend on the language specific implementations.
1147
+
1148
+ Return value:
1149
+ ArrayOfstring (pathway_id)
1150
+
1151
+ Example:
1152
+ # Returns all pathways including E. coli genes 'b0077' and 'b0078'
1153
+ get_pathways_by_genes(['eco:b0077' , 'eco:b0078'])
1154
+
1155
+ --- get_pathways_by_enzymes(enzyme_id_list)
1156
+
1157
+ Search all pathways which include all the given enzymes.
1158
+
1159
+ Return value:
1160
+ ArrayOfstring (pathway_id)
1161
+
1162
+ Example:
1163
+ # Returns all pathways including an enzyme '1.3.99.1'
1164
+ get_pathways_by_enzymes(['ec:1.3.99.1'])
1165
+
1166
+ --- get_pathways_by_compounds(compound_id_list)
1167
+
1168
+ Search all pathways which include all the given compounds.
1169
+
1170
+ Return value:
1171
+ ArrayOfstring (pathway_id)
1172
+
1173
+ Example:
1174
+ # Returns all pathways including compounds 'C00033' and 'C00158'
1175
+ get_pathways_by_compounds(['cpd:C00033', 'cpd:C00158'])
1176
+
1177
+ --- get_pathways_by_glycans(glycan_id_list)
1178
+
1179
+ Search all pathways which include all the given glycans.
1180
+
1181
+ Return value:
1182
+ ArrayOfstring (pathway_id)
1183
+
1184
+ Example
1185
+ # Returns all pathways including glycans 'G00009' and 'G00011'
1186
+ get_pathways_by_glycans(['gl:G00009', 'gl:G00011'])
1187
+
1188
+ --- get_pathways_by_reactions(reaction_id_list)
1189
+
1190
+ Retrieve all pathways which include all the given reaction_ids.
1191
+
1192
+ Return value:
1193
+ ArrayOfstring (pathway_id)
1194
+
1195
+ Example:
1196
+ # Returns all pathways including reactions 'rn:R00959', 'rn:R02740',
1197
+ # 'rn:R00960' and 'rn:R01786'
1198
+ get_pathways_by_reactions(['rn:R00959', 'rn:R02740', 'rn:R00960', 'rn:R01786'])
1199
+
1200
+ --- get_pathways_by_kos(ko_id_list, org)
1201
+
1202
+ Retrieve all pathways of the organisms which include all the given KO IDs.
1203
+
1204
+ Return value:
1205
+ ArrayOfstring (pathway_id)
1206
+
1207
+ Example:
1208
+ # Returns all human pathways including 'ko:K00016' and 'ko:K00382'
1209
+ get_pathways_by_kos(['ko:K00016', 'ko:K00382'], 'hsa')
1210
+
1211
+ # Returns pathways of all organisms including 'ko:K00016' and 'ko:K00382'
1212
+ get_pathways_by_kos(['ko:K00016', 'ko:K00382'], 'all')
1213
+
1214
+
1215
+ + Relation among pathways
1216
+
1217
+ --- get_linked_pathways(pathway_id)
1218
+
1219
+ Retrieve all pathways which are linked from a given pathway_id.
1220
+
1221
+ Return value:
1222
+ ArrayOfstring (pathway_id)
1223
+
1224
+ Example:
1225
+ # Returns IDs of PATHWAY entries linked from 'path:eco00620'.
1226
+ get_linked_pathways('path:eco00620')
1227
+
1228
+
1229
+ + Relation among genes and enzymes
1230
+
1231
+ --- get_genes_by_enzyme(enzyme_id, org)
1232
+
1233
+ Retrieve all genes of the given organism.
1234
+
1235
+ Return value:
1236
+ ArrayOfstring (genes_id)
1237
+
1238
+ Example:
1239
+ # Returns all the GENES entry IDs in E.coli genome which are assigned
1240
+ # EC number ec:1.2.1.1
1241
+ get_genes_by_enzyme('ec:1.2.1.1', 'eco')
1242
+
1243
+ --- get_enzymes_by_gene(genes_id)
1244
+
1245
+ Retrieve all the EC numbers which are assigned to the given gene.
1246
+
1247
+ Return value:
1248
+ ArrayOfstring (enzyme_id)
1249
+
1250
+ Example:
1251
+ # Returns the EC numbers which are assigned to E.coli genes b0002
1252
+ get_enzymes_by_gene('eco:b0002')
1253
+
1254
+
1255
+ + Relation among enzymes, compounds and reactions
1256
+
1257
+ --- get_enzymes_by_compound(compound_id)
1258
+
1259
+ Retrieve all enzymes which have a link to the given compound_id.
1260
+
1261
+ Return value:
1262
+ ArrayOfstring (enzyme_id)
1263
+
1264
+ Example:
1265
+ # Returns the ENZYME entry IDs which have a link to the COMPOUND entry,
1266
+ # 'cpd:C00345'
1267
+ get_enzymes_by_compound('cpd:C00345')
1268
+
1269
+ --- get_enzymes_by_glycan(glycan_id)
1270
+
1271
+ Retrieve all enzymes which have a link to the given glycan_id.
1272
+
1273
+ Return value:
1274
+ ArrayOfstring (enzyme_id)
1275
+
1276
+ Example
1277
+ # Returns the ENZYME entry IDs which have a link to the GLYCAN entry,
1278
+ # 'gl:G00001'
1279
+ get_enzymes_by_glycan('gl:G00001')
1280
+
1281
+ --- get_enzymes_by_reaction(reaction_id)
1282
+
1283
+ Retrieve all enzymes which have a link to the given reaction_id.
1284
+
1285
+ Return value:
1286
+ ArrayOfstring (enzyme_id)
1287
+
1288
+ Example:
1289
+ # Returns the ENZYME entry IDs which have a link to the REACTION entry,
1290
+ # 'rn:R00100'.
1291
+ get_enzymes_by_reaction('rn:R00100')
1292
+
1293
+ --- get_compounds_by_enzyme(enzyme_id)
1294
+
1295
+ Retrieve all compounds which have a link to the given enzyme_id.
1296
+
1297
+ Return value:
1298
+ ArrayOfstring (compound_id)
1299
+
1300
+ Example:
1301
+ # Returns the COMPOUND entry IDs which have a link to the ENZYME entry,
1302
+ # 'ec:2.7.1.12'.
1303
+ get_compounds_by_enzyme('ec:2.7.1.12')
1304
+
1305
+ --- get_compounds_by_reaction(reaction_id)
1306
+
1307
+ Retrieve all compounds which have a link to the given reaction_id.
1308
+
1309
+ Return value:
1310
+ ArrayOfstring (compound_id)
1311
+
1312
+ Example:
1313
+ # Returns the COMPOUND entry IDs which have a link to the REACTION entry,
1314
+ # 'rn:R00100'
1315
+ get_compounds_by_reaction('rn:R00100')
1316
+
1317
+ --- get_glycans_by_enzyme(enzyme_id)
1318
+
1319
+ Retrieve all glycans which have a link to the given enzyme_id.
1320
+
1321
+ Return value:
1322
+ ArrayOfstring (glycan_id)
1323
+
1324
+ Example
1325
+ # Returns the GLYCAN entry IDs which have a link to the ENZYME entry,
1326
+ # 'ec:2.4.1.141'
1327
+ get_glycans_by_enzyme('ec:2.4.1.141')
1328
+
1329
+ --- get_glycans_by_reaction(reaction_id)
1330
+
1331
+ Retrieve all glycans which have a link to the given reaction_id.
1332
+
1333
+ Return value:
1334
+ ArrayOfstring (glycan_id)
1335
+
1336
+ Example
1337
+ # Returns the GLYCAN entry IDs which have a link to the REACTION entry,
1338
+ # 'rn:R06164'
1339
+ get_glycans_by_reaction('rn:R06164')
1340
+
1341
+ --- get_reactions_by_enzyme(enzyme_id)
1342
+
1343
+ Retrieve all reactions which have a link to the given enzyme_id.
1344
+
1345
+ Return value:
1346
+ ArrayOfstring (reaction_id)
1347
+
1348
+ Example:
1349
+ # Returns the REACTION entry IDs which have a link to the ENZYME entry,
1350
+ # 'ec:2.7.1.12'
1351
+ get_reactions_by_enzyme('ec:2.7.1.12')
1352
+
1353
+ --- get_reactions_by_compound(compound_id)
1354
+
1355
+ Retrieve all reactions which have a link to the given compound_id.
1356
+
1357
+ Return value:
1358
+ ArrayOfstring (reaction_id)
1359
+
1360
+ Example:
1361
+ # Returns the REACTION entry IDs which have a link to the COMPOUND entry,
1362
+ # 'cpd:C00199'
1363
+ get_reactions_by_compound('cpd:C00199')
1364
+
1365
+ --- get_reactions_by_glycan(glycan_id)
1366
+
1367
+ Retrieve all reactions which have a link to the given glycan_id.
1368
+
1369
+ Return value:
1370
+ ArrayOfstring (reaction_id)
1371
+
1372
+ Example
1373
+ # Returns the REACTION entry IDs which have a link to the GLYCAN entry,
1374
+ # 'gl:G00001'
1375
+ get_reactions_by_glycan('gl:G00001')
1376
+
1377
+
1378
+ ==== GENES
1379
+
1380
+ This section describes the APIs for GENES database. For more details
1381
+ on GENES database, see:
1382
+
1383
+ * ((<URL:http://www.genome.jp/kegg/kegg2.html#genes>))
1384
+
1385
+ --- get_genes_by_organism(org, start, max_results)
1386
+
1387
+ Retrieve all genes of the specified organism.
1388
+
1389
+ Return value:
1390
+ ArrayOfstring (genes_id)
1391
+
1392
+ Example:
1393
+ # Retrive hundred H. influenzae genes at once.
1394
+ get_genes_by_organism('hin', 1, 100)
1395
+ get_genes_by_organism('hin', 101, 100)
1396
+
1397
+
1398
+ ==== GENOME
1399
+
1400
+ This section describes the APIs for GENOME database. For more details
1401
+ on GENOME database, see:
1402
+
1403
+ * ((<URL:http://www.genome.jp/kegg/kegg2.html#genome>))
1404
+
1405
+ --- get_number_of_genes_by_organism(org)
1406
+
1407
+ Get the number of genes coded in the specified organism's genome.
1408
+
1409
+ Return value:
1410
+ int
1411
+
1412
+ Example:
1413
+ # Get the number of the genes on the E.coli genome.
1414
+ get_number_of_genes_by_organism('eco')
1415
+
1416
+
1417
+ ==== LIGAND
1418
+
1419
+ This section describes the APIs for LIGAND database.
1420
+
1421
+ --- convert_mol_to_kcf(mol_text)
1422
+
1423
+ Convert a MOL format into the KCF format.
1424
+
1425
+ Return value:
1426
+ string
1427
+
1428
+ Example:
1429
+ convert_mol_to_kcf(mol_str)
1430
+
1431
+
1432
+ == Notes
1433
+
1434
+ Last updated: May 31, 2005
1435
+
1436
+ =end
1437
+