bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,32 @@
1
+ #
2
+ # bio/db/embl/swissprot.rb - SwissProt database class
3
+ #
4
+ # Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: swissprot.rb,v 1.3 2004/08/23 23:40:35 k Exp $
21
+ #
22
+
23
+ require 'bio/db/embl/sptr'
24
+
25
+ module Bio
26
+
27
+ class SwissProt < SPTR
28
+ # Nothing to do (SwissProt format is abstracted in SPTR)
29
+ end
30
+
31
+ end
32
+
@@ -0,0 +1,31 @@
1
+ #
2
+ # bio/db/embl/trembl.rb - TrEMBL database class
3
+ #
4
+ # Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: trembl.rb,v 1.3 2004/08/23 23:40:35 k Exp $
21
+ #
22
+
23
+ require 'bio/db/embl/sptr'
24
+
25
+ module Bio
26
+
27
+ class TrEMBL < SPTR
28
+ # Nothing to do (TrEMBL format is abstracted in SPTR)
29
+ end
30
+
31
+ end
@@ -0,0 +1,32 @@
1
+ #
2
+ # bio/db/embl/uniprot.rb - UniProt database class
3
+ #
4
+ # Copyright (C) 2005 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: uniprot.rb,v 1.1 2005/09/10 23:43:35 k Exp $
21
+ #
22
+
23
+ require 'bio/db/embl/sptr'
24
+
25
+ module Bio
26
+
27
+ class UniProt < SPTR
28
+ # Nothing to do (UniProt format is abstracted in SPTR)
29
+ end
30
+
31
+ end
32
+
@@ -0,0 +1,604 @@
1
+ #
2
+ # bio/db/fantom.rb - RIKEN FANTOM2 database classes
3
+ #
4
+ # Copyright (C) 2003 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: fantom.rb,v 1.11 2005/09/26 13:00:06 k Exp $
21
+ #
22
+
23
+ begin
24
+ require 'rexml/document'
25
+ rescue LoadError
26
+ end
27
+ require 'uri'
28
+ require 'net/http'
29
+
30
+ require 'bio/db'
31
+ #require 'bio/sequence'
32
+
33
+ module Bio
34
+
35
+ module FANTOM
36
+
37
+ def query(idstr, http_proxy = nil)
38
+ xml = get_by_id(idstr, http_proxy)
39
+ seqs = MaXML::Sequences.new(xml.to_s)
40
+ seqs[0]
41
+ end
42
+ module_function :query
43
+
44
+ def get_by_id(idstr, http_proxy = nil)
45
+ addr = 'fantom.gsc.riken.go.jp'
46
+ port = 80
47
+ path = "/db/maxml/maxmlseq.cgi?masterid=#{URI.escape(idstr.to_s)}&style=xml"
48
+ proxy = URI.parse(http_proxy.to_s)
49
+ xml = ''
50
+ Net::HTTP.start(addr, port, proxy.host, proxy.port) do |http|
51
+ response, = http.get(path)
52
+ xml = response.body
53
+ end
54
+ xml
55
+ end
56
+ module_function :get_by_id
57
+
58
+
59
+ class MaXML < DB
60
+ # DTD of MaXML(Mouse annotation XML)
61
+ # http://fantom.gsc.riken.go.jp/maxml/maxml.dtd
62
+
63
+ DELIMITER = RS = "\n--EOF--\n"
64
+ # This class is for {allseq|repseq|allclust}.sep.xml,
65
+ # not for {allseq|repseq|allclust}.xml.
66
+
67
+ Data_XPath = ''
68
+
69
+ def initialize(x)
70
+ if x.is_a?(REXML::Element) then
71
+ @elem = x
72
+ else
73
+ if x.is_a?(String) then
74
+ x = x.sub(/#{Regexp.escape(DELIMITER)}\z/om, "\n")
75
+ end
76
+ doc = REXML::Document.new(x)
77
+ @elem = doc.elements[self.class::Data_XPath]
78
+ #raise 'element is null' unless @elem
79
+ @elem = REXML::Document.new('') unless @elem
80
+ end
81
+ end
82
+ attr_reader :elem
83
+
84
+ def to_s
85
+ @elem.to_s
86
+ end
87
+
88
+ def gsub_entities(str)
89
+ # workaround for bug?
90
+ if str then
91
+ str.gsub(/\&\#(\d{1,3})\;/) { sprintf("%c", $1.to_i) }
92
+ else
93
+ str
94
+ end
95
+ end
96
+
97
+ def entry_id
98
+ unless defined?(@entry_id)
99
+ @entry_id = @elem.attributes['id']
100
+ end
101
+ @entry_id
102
+ end
103
+ def self.define_element_text_method(array)
104
+ array.each do |tagstr|
105
+ module_eval("
106
+ def #{tagstr}
107
+ unless defined?(@#{tagstr})
108
+ @#{tagstr} = gsub_entities(@elem.text('#{tagstr}'))
109
+ end
110
+ @#{tagstr}
111
+ end
112
+ ")
113
+ end
114
+ end
115
+ private_class_method :define_element_text_method
116
+
117
+ class Cluster < MaXML
118
+ # (MaXML cluster)
119
+ # ftp://fantom2.gsc.riken.go.jp/fantom/2.1/allclust.sep.xml.gz
120
+
121
+ Data_XPath = 'maxml-clusters/cluster'
122
+
123
+ def representative_seqid
124
+ unless defined?(@representative_seqid)
125
+ @representative_seqid =
126
+ gsub_entities(@elem.text('representative-seqid'))
127
+ end
128
+ @representative_seqid
129
+ end
130
+
131
+ def sequences
132
+ unless defined?(@sequences)
133
+ @sequences = MaXML::Sequences.new(@elem)
134
+ end
135
+ @sequences
136
+ end
137
+
138
+ def sequence(idstr = nil)
139
+ idstr ? sequences[idstr] : representative_sequence
140
+ end
141
+
142
+ def representative_sequence
143
+ unless defined?(@representative_sequence)
144
+ rid = representative_seqid
145
+ @representative_sequence =
146
+ rid ? sequences[representative_seqid] : nil
147
+ end
148
+ @representative_sequence
149
+ end
150
+ alias representative_clone representative_sequence
151
+
152
+ def representative_annotations
153
+ e = representative_sequence
154
+ e ? e.annotations : nil
155
+ end
156
+
157
+ def representative_cloneid
158
+ e = representative_sequence
159
+ e ? e.cloneid : nil
160
+ end
161
+
162
+ define_element_text_method(%w(fantomid))
163
+ end #class MaXML::Cluster
164
+
165
+ class Sequences < MaXML
166
+ Data_XPath = 'maxml-sequences'
167
+
168
+ include Enumerable
169
+ def each
170
+ to_a.each { |x| yield x }
171
+ end
172
+
173
+ def to_a
174
+ unless defined?(@sequences)
175
+ @sequences = @elem.get_elements('sequence')
176
+ @sequences.collect! { |e| MaXML::Sequence.new(e) }
177
+ end
178
+ @sequences
179
+ end
180
+
181
+ def get(idstr)
182
+ unless defined?(@hash)
183
+ @hash = {}
184
+ end
185
+ unless @hash.member?(idstr) then
186
+ @hash[idstr] = self.find do |x|
187
+ x.altid.values.index(idstr)
188
+ end
189
+ end
190
+ @hash[idstr]
191
+ end
192
+
193
+ def [](*arg)
194
+ if arg[0].is_a?(String) and arg.size == 1 then
195
+ get(arg[0])
196
+ else
197
+ to_a[*arg]
198
+ end
199
+ end
200
+
201
+ def cloneids
202
+ unless defined?(@cloneids)
203
+ @cloneids = to_a.collect { |x| x.cloneid }
204
+ end
205
+ @cloneids
206
+ end
207
+
208
+ def id_strings
209
+ unless defined?(@id_strings)
210
+ @id_strings = to_a.collect { |x| x.id_strings }
211
+ @id_strings.flatten!
212
+ @id_strings.sort!
213
+ @id_strings.uniq!
214
+ end
215
+ @id_strings
216
+ end
217
+ end #class MaXML::Sequences
218
+
219
+ class Sequence < MaXML
220
+ # (MaXML sequence)
221
+ # ftp://fantom2.gsc.riken.go.jp/fantom/2.1/allseq.sep.xml.gz
222
+ # ftp://fantom2.gsc.riken.go.jp/fantom/2.1/repseq.sep.xml.gz
223
+
224
+ Data_XPath = 'maxml-sequences/sequence'
225
+
226
+ def altid(t = nil)
227
+ unless defined?(@altid)
228
+ @altid = {}
229
+ @elem.each_element('altid') do |e|
230
+ @altid[e.attributes['type']] = gsub_entities(e.text)
231
+ end
232
+ end
233
+ if t then
234
+ @altid[t]
235
+ else
236
+ @altid
237
+ end
238
+ end
239
+
240
+ def id_strings
241
+ altid.values.sort.uniq
242
+ end
243
+
244
+ def library_id
245
+ entry_id[0,2]
246
+ end
247
+
248
+ def annotations
249
+ unless defined?(@annotations)
250
+ @annotations =
251
+ MaXML::Annotations.new(@elem.elements['annotations'])
252
+ end
253
+ @annotations
254
+ end
255
+
256
+ define_element_text_method(%w(annotator version modified_time comment))
257
+
258
+ def self.define_id_method(array)
259
+ array.each do |tagstr|
260
+ module_eval("
261
+ def #{tagstr}
262
+ unless defined?(@#{tagstr})
263
+ @#{tagstr} = gsub_entities(@elem.text('#{tagstr}'))
264
+ @#{tagstr} = altid('#{tagstr}') unless @#{tagstr}
265
+ end
266
+ @#{tagstr}
267
+ end
268
+ ")
269
+ end
270
+ end
271
+ private_class_method :define_id_method
272
+
273
+ define_id_method(%w(seqid fantomid cloneid rearrayid accession))
274
+ end #class MaXML::Sequence
275
+
276
+ class Annotations < MaXML
277
+ Data_XPath = nil
278
+
279
+ include Enumerable
280
+ def each
281
+ to_a.each { |x| yield x }
282
+ end
283
+
284
+ def to_a
285
+ unless defined?(@a)
286
+ @a = @elem.get_elements('annotation')
287
+ @a.collect! { |e| MaXML::Annotation.new(e) }
288
+ end
289
+ @a
290
+ end
291
+
292
+ def get_all_by_qualifier(qstr)
293
+ unless defined?(@hash)
294
+ @hash = {}
295
+ end
296
+ unless @hash.member?(qstr) then
297
+ @hash[qstr] = self.find_all do |x|
298
+ x.qualifier == qstr
299
+ end
300
+ end
301
+ @hash[qstr]
302
+ end
303
+
304
+ def get_by_qualifier(qstr)
305
+ a = get_all_by_qualifier(qstr)
306
+ a ? a[0] : nil
307
+ end
308
+
309
+ def [](*arg)
310
+ if arg[0].is_a?(String) and arg.size == 1 then
311
+ get_by_qualifier(arg[0])
312
+ else
313
+ to_a[*arg]
314
+ end
315
+ end
316
+
317
+ def cds_start
318
+ unless defined?(@cds_start)
319
+ e = get_by_qualifier('cds_start')
320
+ @cds_start = e ? e.anntext.to_i : nil
321
+ end
322
+ @cds_start
323
+ end
324
+
325
+ def cds_stop
326
+ unless defined?(@cds_stop)
327
+ e = get_by_qualifier('cds_stop')
328
+ @cds_stop = e ? e.anntext.to_i : nil
329
+ end
330
+ @cds_stop
331
+ end
332
+
333
+ def gene_name
334
+ unless defined?(@gene_name)
335
+ e = get_by_qualifier('gene_name')
336
+ @gene_name = e ? e.anntext : nil
337
+ end
338
+ @gene_name
339
+ end
340
+
341
+ def data_source
342
+ unless defined?(@data_source)
343
+ e = get_by_qualifier('gene_name')
344
+ @data_source = e ? e.datasrc[0] : nil
345
+ end
346
+ @data_source
347
+ end
348
+
349
+ def evidence
350
+ unless defined?(@evidence)
351
+ e = get_by_qualifier('gene_name')
352
+ @evidence = e ? e.evidence : nil
353
+ end
354
+ @evidence
355
+ end
356
+ end #class MaXML::Annotations
357
+
358
+ class Annotation < MaXML
359
+ def entry_id
360
+ nil
361
+ end
362
+
363
+ class DataSrc < String
364
+ def initialize(text, href)
365
+ super(text)
366
+ @href = href
367
+ end
368
+ attr_reader :href
369
+ end
370
+
371
+ def datasrc
372
+ unless defined?(@datasrc)
373
+ @datasrc = []
374
+ @elem.each_element('datasrc') do |e|
375
+ text = e.text
376
+ href = e.attributes['href']
377
+ @datasrc << DataSrc.new(gsub_entities(text), gsub_entities(href))
378
+ end
379
+ end
380
+ @datasrc
381
+ end
382
+
383
+ define_element_text_method(%w(qualifier srckey anntext evidence))
384
+ end #class MaXML::Annotation
385
+
386
+ end #class MaXML
387
+
388
+ end #module FANTOM
389
+
390
+ end #module Bio
391
+
392
+ =begin
393
+
394
+ Bio::FANTOM are database classes (and modules) treating RIKEN FANTOM2 data.
395
+ FANTOM2 is available at ((<URL:http://fantom2.gsc.riken.go.jp/>)).
396
+
397
+ = Bio::FANTOM
398
+
399
+ This module contains useful methods to access databases.
400
+
401
+ --- Bio::FANTOM.query(idstr, http_proxy=nil)
402
+
403
+ Get MaXML sequence data corresponding to given ID through the internet
404
+ from ((<URL:http://fantom.gsc.riken.go.jp/db/maxml/)).
405
+ Returns Bio::FANTOM::MaXML::Sequence object.
406
+
407
+ --- Bio::FANTOM.get_by_id(idstr, http_proxy=nil)
408
+
409
+ Same as FANTOM.query, but returns XML document as a string.
410
+ (Reference: bio/io/registry.rb)
411
+
412
+
413
+ = Bio::FANTOM::MaXML::Cluster
414
+
415
+ This class is for 'allclust.sep.xml' found at
416
+ ((<URL:ftp://fantom2.gsc.riken.go.jp/fantom/2.1/allclust.sep.xml.gz>)).
417
+ Not that this class is not suitable for 'allclust.xml'.
418
+
419
+ --- Bio::FANTOM::MaXML::Cluster.new(str)
420
+
421
+ --- Bio::FANTOM::MaXML::Cluster#entry_id
422
+
423
+ --- Bio::FANTOM::MaXML::Cluster#fantomid
424
+
425
+ --- Bio::FANTOM::MaXML::Cluster#representative_seqid
426
+
427
+ --- Bio::FANTOM::MaXML::Cluster#sequences
428
+
429
+ Lists sequences in this cluster.
430
+ Returns Bio::FANTOM::MaXML::Sequences object.
431
+
432
+ --- Bio::FANTOM::MaXML::Cluster#sequence(id_str)
433
+
434
+ Shows a sequence information of given id.
435
+ Returns Bio::FANTOM::MaXML::Sequence object or nil.
436
+
437
+ --- Bio::FANTOM::MaXML::Cluster#representataive_sequence
438
+ --- Bio::FANTOM::MaXML::Cluster#representataive_clone
439
+
440
+ Shows a sequence of repesentative_seqid.
441
+ Returns Bio::FANTOM::MaXML::Sequence object (or nil).
442
+
443
+ -- Bio::FANTOM::MaXML::Cluster#representative_annotations
444
+
445
+ Shows annotations of repesentative sequence.
446
+ Returns Bio::FANTOM::MaXML::Annotations object (or nil).
447
+
448
+ -- Bio::FANTOM::MaXML::Cluster#representative_cloneid
449
+
450
+ Shows cloneid of repesentative sequence.
451
+ Returns String (or nil).
452
+
453
+
454
+ = Bio::FANTOM::MaXML::Sequences
455
+
456
+ The instances of this class are automatically created
457
+ by Bio::FANTOM::MaXML::Cluster class.
458
+
459
+ This class can also be used for 'allseq.sep.xml' and 'repseq.sep.xml',
460
+ but you'd better using Bio::FANTOM::MaXML::Sequence class.
461
+
462
+ In addition, this class can be used for 'allseq.xml' and 'repseq.xml',
463
+ but you'd better not to use them, becase of the speed is very slow.
464
+
465
+ --- Bio::FANTOM::MaXML::Sequences#to_a
466
+
467
+ Returns an Array of Bio::FANTOM::MaXML::Sequence objects.
468
+
469
+ --- Bio::FANTOM::MaXML::Sequences#each
470
+
471
+ --- Bio::FANTOM::MaXML::Sequences#[](x)
472
+
473
+ Same as to_a[x] when x is a integer.
474
+ Same as get[x] when x is a string.
475
+
476
+ --- Bio::FANTOM::MaXML::Sequences#get(id_str)
477
+
478
+ Shows a sequence information of given id.
479
+ Returns Bio::FANTOM::MaXML::Sequence object or nil.
480
+
481
+ --- Bio::FANTOM::MaXML::Sequences#cloneids
482
+
483
+ Shows clone ID list.
484
+ Returns an array of strings.
485
+
486
+ --- Bio::FANTOM::MaXML::Sequences#id_strings
487
+
488
+ Shows ID list.
489
+ Returns an array of strings.
490
+
491
+
492
+ = Bio::FANTOM::MaXML::Sequence
493
+
494
+ This class is for 'allseq.sep.xml' and 'repseq.sep.xml' found at
495
+ ((<URL:ftp://fantom2.gsc.riken.go.jp/fantom/2.1/allseq.sep.xml.gz>)) and
496
+ ((<URL:ftp://fantom2.gsc.riken.go.jp/fantom/2.1/repseq.sep.xml.gz>)).
497
+ Not that this class is not suitable for 'allseq.xml' and 'repseq.xml'.
498
+
499
+ In addition, the instances of this class are automatically created
500
+ by Bio::FANTOM::MaXML::Sequences class.
501
+
502
+ --- Bio::FANTOM::MaXML::Sequence.new(str)
503
+
504
+ --- Bio::FANTOM::MaXML::Sequence#entry_id
505
+
506
+ --- Bio::FANTOM::MaXML::Sequence#altid(type_str = nil)
507
+
508
+ Returns hash of altid if no arguments are given.
509
+ Returns ID as a string if a type of ID (string) is given.
510
+
511
+ --- Bio::FANTOM::MaXML::Sequence#annotations
512
+
513
+ Gets lists of annotation data.
514
+ Returns a Bio::FANTOM::MaXML::Annotations object.
515
+
516
+ --- Bio::FANTOM::MaXML::Sequence#id_strings
517
+
518
+ Gets lists of ID. (same as altid.values)
519
+ Returns an array of strings.
520
+
521
+ --- Bio::FANTOM::MaXML::Sequence#library_id
522
+
523
+ Shows library ID. (same as cloneid[0,2])
524
+ Library IDs are listed at:
525
+ ((<URL:http://fantom2.gsc.riken.go.jp/fantom2/SI/sup01_est_3r_libraryinfo.pdf))
526
+ ((<URL:http://fantom2.gsc.riken.go.jp/fantom2/SI/sup01_est_5f_libraryinfo.pdf))
527
+
528
+ --- Bio::FANTOM::MaXML::Sequence#seqid
529
+
530
+ --- Bio::FANTOM::MaXML::Sequence#fantomid
531
+
532
+ --- Bio::FANTOM::MaXML::Sequence#cloneid
533
+
534
+ --- Bio::FANTOM::MaXML::Sequence#rearrayid
535
+
536
+ --- Bio::FANTOM::MaXML::Sequence#accession
537
+
538
+ --- Bio::FANTOM::MaXML::Sequence#annotator
539
+
540
+ --- Bio::FANTOM::MaXML::Sequence#version
541
+
542
+ --- Bio::FANTOM::MaXML::Sequence#modified_time
543
+
544
+ --- Bio::FANTOM::MaXML::Sequence#comment
545
+
546
+
547
+ = Bio::FANTOM::MaXML::Annotations
548
+
549
+ The instances of this class are automatically created
550
+ by Bio::FANTOM::MaXML::Sequence class.
551
+
552
+ --- Bio::FANTOM::MaXML::Annotations#to_a
553
+
554
+ Returns an Array of Bio::FANTOM::MaXML::Annotations objects.
555
+
556
+ --- Bio::FANTOM::MaXML::Annotations#each
557
+
558
+ --- Bio::FANTOM::MaXML::Annotations#get_all_by_qualifier(qstr)
559
+
560
+ --- Bio::FANTOM::MaXML::Annotations#get_by_qualifier(qstr)
561
+
562
+ --- Bio::FANTOM::MaXML::Annotations#[](x)
563
+
564
+ Same as to_a[x] when x is a integer.
565
+ Same as get_by_qualifier[x] when x is a string.
566
+
567
+ --- Bio::FANTOM::MaXML::Annotations#cds_start
568
+ --- Bio::FANTOM::MaXML::Annotations#cds_stop
569
+ --- Bio::FANTOM::MaXML::Annotations#gene_name
570
+ --- Bio::FANTOM::MaXML::Annotations#data_source
571
+ --- Bio::FANTOM::MaXML::Annotations#evidence
572
+
573
+
574
+ = Bio::FANTOM::MaXML::Annotation
575
+
576
+ The instances of this class are automatically created
577
+ by Bio::FANTOM::MaXML::Annotations class.
578
+
579
+ --- Bio::FANTOM::MaXML::Annotation#datasrc
580
+
581
+ Returns an Array of Bio::FANTOM::MaXML::Annotation::DataSrc objects.
582
+
583
+ --- Bio::FANTOM::MaXML::Annotation#qualifier
584
+
585
+ --- Bio::FANTOM::MaXML::Annotation#srckey
586
+
587
+ --- Bio::FANTOM::MaXML::Annotation#anntext
588
+
589
+ --- Bio::FANTOM::MaXML::Annotation#evidence
590
+
591
+ = Bio::FANTOM::MaXML::Annotation::DataSrc < String
592
+
593
+ The instances of this class are automatically created
594
+ by Bio::FANTOM::MaXML::Annotation class.
595
+
596
+ ---- Bio::FANTOM::MaXML::Annotation::DataSrc#href
597
+
598
+ Shows a link URL to database web page as an String.
599
+
600
+ = References
601
+
602
+ * ((<URL:http://fantom2.gsc.riken.go.jp/>))
603
+
604
+ =end