bio 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,32 @@
1
+ #
2
+ # bio/db/embl/swissprot.rb - SwissProt database class
3
+ #
4
+ # Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: swissprot.rb,v 1.3 2004/08/23 23:40:35 k Exp $
21
+ #
22
+
23
+ require 'bio/db/embl/sptr'
24
+
25
+ module Bio
26
+
27
+ class SwissProt < SPTR
28
+ # Nothing to do (SwissProt format is abstracted in SPTR)
29
+ end
30
+
31
+ end
32
+
@@ -0,0 +1,31 @@
1
+ #
2
+ # bio/db/embl/trembl.rb - TrEMBL database class
3
+ #
4
+ # Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: trembl.rb,v 1.3 2004/08/23 23:40:35 k Exp $
21
+ #
22
+
23
+ require 'bio/db/embl/sptr'
24
+
25
+ module Bio
26
+
27
+ class TrEMBL < SPTR
28
+ # Nothing to do (TrEMBL format is abstracted in SPTR)
29
+ end
30
+
31
+ end
@@ -0,0 +1,32 @@
1
+ #
2
+ # bio/db/embl/uniprot.rb - UniProt database class
3
+ #
4
+ # Copyright (C) 2005 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: uniprot.rb,v 1.1 2005/09/10 23:43:35 k Exp $
21
+ #
22
+
23
+ require 'bio/db/embl/sptr'
24
+
25
+ module Bio
26
+
27
+ class UniProt < SPTR
28
+ # Nothing to do (UniProt format is abstracted in SPTR)
29
+ end
30
+
31
+ end
32
+
@@ -0,0 +1,604 @@
1
+ #
2
+ # bio/db/fantom.rb - RIKEN FANTOM2 database classes
3
+ #
4
+ # Copyright (C) 2003 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: fantom.rb,v 1.11 2005/09/26 13:00:06 k Exp $
21
+ #
22
+
23
+ begin
24
+ require 'rexml/document'
25
+ rescue LoadError
26
+ end
27
+ require 'uri'
28
+ require 'net/http'
29
+
30
+ require 'bio/db'
31
+ #require 'bio/sequence'
32
+
33
+ module Bio
34
+
35
+ module FANTOM
36
+
37
+ def query(idstr, http_proxy = nil)
38
+ xml = get_by_id(idstr, http_proxy)
39
+ seqs = MaXML::Sequences.new(xml.to_s)
40
+ seqs[0]
41
+ end
42
+ module_function :query
43
+
44
+ def get_by_id(idstr, http_proxy = nil)
45
+ addr = 'fantom.gsc.riken.go.jp'
46
+ port = 80
47
+ path = "/db/maxml/maxmlseq.cgi?masterid=#{URI.escape(idstr.to_s)}&style=xml"
48
+ proxy = URI.parse(http_proxy.to_s)
49
+ xml = ''
50
+ Net::HTTP.start(addr, port, proxy.host, proxy.port) do |http|
51
+ response, = http.get(path)
52
+ xml = response.body
53
+ end
54
+ xml
55
+ end
56
+ module_function :get_by_id
57
+
58
+
59
+ class MaXML < DB
60
+ # DTD of MaXML(Mouse annotation XML)
61
+ # http://fantom.gsc.riken.go.jp/maxml/maxml.dtd
62
+
63
+ DELIMITER = RS = "\n--EOF--\n"
64
+ # This class is for {allseq|repseq|allclust}.sep.xml,
65
+ # not for {allseq|repseq|allclust}.xml.
66
+
67
+ Data_XPath = ''
68
+
69
+ def initialize(x)
70
+ if x.is_a?(REXML::Element) then
71
+ @elem = x
72
+ else
73
+ if x.is_a?(String) then
74
+ x = x.sub(/#{Regexp.escape(DELIMITER)}\z/om, "\n")
75
+ end
76
+ doc = REXML::Document.new(x)
77
+ @elem = doc.elements[self.class::Data_XPath]
78
+ #raise 'element is null' unless @elem
79
+ @elem = REXML::Document.new('') unless @elem
80
+ end
81
+ end
82
+ attr_reader :elem
83
+
84
+ def to_s
85
+ @elem.to_s
86
+ end
87
+
88
+ def gsub_entities(str)
89
+ # workaround for bug?
90
+ if str then
91
+ str.gsub(/\&\#(\d{1,3})\;/) { sprintf("%c", $1.to_i) }
92
+ else
93
+ str
94
+ end
95
+ end
96
+
97
+ def entry_id
98
+ unless defined?(@entry_id)
99
+ @entry_id = @elem.attributes['id']
100
+ end
101
+ @entry_id
102
+ end
103
+ def self.define_element_text_method(array)
104
+ array.each do |tagstr|
105
+ module_eval("
106
+ def #{tagstr}
107
+ unless defined?(@#{tagstr})
108
+ @#{tagstr} = gsub_entities(@elem.text('#{tagstr}'))
109
+ end
110
+ @#{tagstr}
111
+ end
112
+ ")
113
+ end
114
+ end
115
+ private_class_method :define_element_text_method
116
+
117
+ class Cluster < MaXML
118
+ # (MaXML cluster)
119
+ # ftp://fantom2.gsc.riken.go.jp/fantom/2.1/allclust.sep.xml.gz
120
+
121
+ Data_XPath = 'maxml-clusters/cluster'
122
+
123
+ def representative_seqid
124
+ unless defined?(@representative_seqid)
125
+ @representative_seqid =
126
+ gsub_entities(@elem.text('representative-seqid'))
127
+ end
128
+ @representative_seqid
129
+ end
130
+
131
+ def sequences
132
+ unless defined?(@sequences)
133
+ @sequences = MaXML::Sequences.new(@elem)
134
+ end
135
+ @sequences
136
+ end
137
+
138
+ def sequence(idstr = nil)
139
+ idstr ? sequences[idstr] : representative_sequence
140
+ end
141
+
142
+ def representative_sequence
143
+ unless defined?(@representative_sequence)
144
+ rid = representative_seqid
145
+ @representative_sequence =
146
+ rid ? sequences[representative_seqid] : nil
147
+ end
148
+ @representative_sequence
149
+ end
150
+ alias representative_clone representative_sequence
151
+
152
+ def representative_annotations
153
+ e = representative_sequence
154
+ e ? e.annotations : nil
155
+ end
156
+
157
+ def representative_cloneid
158
+ e = representative_sequence
159
+ e ? e.cloneid : nil
160
+ end
161
+
162
+ define_element_text_method(%w(fantomid))
163
+ end #class MaXML::Cluster
164
+
165
+ class Sequences < MaXML
166
+ Data_XPath = 'maxml-sequences'
167
+
168
+ include Enumerable
169
+ def each
170
+ to_a.each { |x| yield x }
171
+ end
172
+
173
+ def to_a
174
+ unless defined?(@sequences)
175
+ @sequences = @elem.get_elements('sequence')
176
+ @sequences.collect! { |e| MaXML::Sequence.new(e) }
177
+ end
178
+ @sequences
179
+ end
180
+
181
+ def get(idstr)
182
+ unless defined?(@hash)
183
+ @hash = {}
184
+ end
185
+ unless @hash.member?(idstr) then
186
+ @hash[idstr] = self.find do |x|
187
+ x.altid.values.index(idstr)
188
+ end
189
+ end
190
+ @hash[idstr]
191
+ end
192
+
193
+ def [](*arg)
194
+ if arg[0].is_a?(String) and arg.size == 1 then
195
+ get(arg[0])
196
+ else
197
+ to_a[*arg]
198
+ end
199
+ end
200
+
201
+ def cloneids
202
+ unless defined?(@cloneids)
203
+ @cloneids = to_a.collect { |x| x.cloneid }
204
+ end
205
+ @cloneids
206
+ end
207
+
208
+ def id_strings
209
+ unless defined?(@id_strings)
210
+ @id_strings = to_a.collect { |x| x.id_strings }
211
+ @id_strings.flatten!
212
+ @id_strings.sort!
213
+ @id_strings.uniq!
214
+ end
215
+ @id_strings
216
+ end
217
+ end #class MaXML::Sequences
218
+
219
+ class Sequence < MaXML
220
+ # (MaXML sequence)
221
+ # ftp://fantom2.gsc.riken.go.jp/fantom/2.1/allseq.sep.xml.gz
222
+ # ftp://fantom2.gsc.riken.go.jp/fantom/2.1/repseq.sep.xml.gz
223
+
224
+ Data_XPath = 'maxml-sequences/sequence'
225
+
226
+ def altid(t = nil)
227
+ unless defined?(@altid)
228
+ @altid = {}
229
+ @elem.each_element('altid') do |e|
230
+ @altid[e.attributes['type']] = gsub_entities(e.text)
231
+ end
232
+ end
233
+ if t then
234
+ @altid[t]
235
+ else
236
+ @altid
237
+ end
238
+ end
239
+
240
+ def id_strings
241
+ altid.values.sort.uniq
242
+ end
243
+
244
+ def library_id
245
+ entry_id[0,2]
246
+ end
247
+
248
+ def annotations
249
+ unless defined?(@annotations)
250
+ @annotations =
251
+ MaXML::Annotations.new(@elem.elements['annotations'])
252
+ end
253
+ @annotations
254
+ end
255
+
256
+ define_element_text_method(%w(annotator version modified_time comment))
257
+
258
+ def self.define_id_method(array)
259
+ array.each do |tagstr|
260
+ module_eval("
261
+ def #{tagstr}
262
+ unless defined?(@#{tagstr})
263
+ @#{tagstr} = gsub_entities(@elem.text('#{tagstr}'))
264
+ @#{tagstr} = altid('#{tagstr}') unless @#{tagstr}
265
+ end
266
+ @#{tagstr}
267
+ end
268
+ ")
269
+ end
270
+ end
271
+ private_class_method :define_id_method
272
+
273
+ define_id_method(%w(seqid fantomid cloneid rearrayid accession))
274
+ end #class MaXML::Sequence
275
+
276
+ class Annotations < MaXML
277
+ Data_XPath = nil
278
+
279
+ include Enumerable
280
+ def each
281
+ to_a.each { |x| yield x }
282
+ end
283
+
284
+ def to_a
285
+ unless defined?(@a)
286
+ @a = @elem.get_elements('annotation')
287
+ @a.collect! { |e| MaXML::Annotation.new(e) }
288
+ end
289
+ @a
290
+ end
291
+
292
+ def get_all_by_qualifier(qstr)
293
+ unless defined?(@hash)
294
+ @hash = {}
295
+ end
296
+ unless @hash.member?(qstr) then
297
+ @hash[qstr] = self.find_all do |x|
298
+ x.qualifier == qstr
299
+ end
300
+ end
301
+ @hash[qstr]
302
+ end
303
+
304
+ def get_by_qualifier(qstr)
305
+ a = get_all_by_qualifier(qstr)
306
+ a ? a[0] : nil
307
+ end
308
+
309
+ def [](*arg)
310
+ if arg[0].is_a?(String) and arg.size == 1 then
311
+ get_by_qualifier(arg[0])
312
+ else
313
+ to_a[*arg]
314
+ end
315
+ end
316
+
317
+ def cds_start
318
+ unless defined?(@cds_start)
319
+ e = get_by_qualifier('cds_start')
320
+ @cds_start = e ? e.anntext.to_i : nil
321
+ end
322
+ @cds_start
323
+ end
324
+
325
+ def cds_stop
326
+ unless defined?(@cds_stop)
327
+ e = get_by_qualifier('cds_stop')
328
+ @cds_stop = e ? e.anntext.to_i : nil
329
+ end
330
+ @cds_stop
331
+ end
332
+
333
+ def gene_name
334
+ unless defined?(@gene_name)
335
+ e = get_by_qualifier('gene_name')
336
+ @gene_name = e ? e.anntext : nil
337
+ end
338
+ @gene_name
339
+ end
340
+
341
+ def data_source
342
+ unless defined?(@data_source)
343
+ e = get_by_qualifier('gene_name')
344
+ @data_source = e ? e.datasrc[0] : nil
345
+ end
346
+ @data_source
347
+ end
348
+
349
+ def evidence
350
+ unless defined?(@evidence)
351
+ e = get_by_qualifier('gene_name')
352
+ @evidence = e ? e.evidence : nil
353
+ end
354
+ @evidence
355
+ end
356
+ end #class MaXML::Annotations
357
+
358
+ class Annotation < MaXML
359
+ def entry_id
360
+ nil
361
+ end
362
+
363
+ class DataSrc < String
364
+ def initialize(text, href)
365
+ super(text)
366
+ @href = href
367
+ end
368
+ attr_reader :href
369
+ end
370
+
371
+ def datasrc
372
+ unless defined?(@datasrc)
373
+ @datasrc = []
374
+ @elem.each_element('datasrc') do |e|
375
+ text = e.text
376
+ href = e.attributes['href']
377
+ @datasrc << DataSrc.new(gsub_entities(text), gsub_entities(href))
378
+ end
379
+ end
380
+ @datasrc
381
+ end
382
+
383
+ define_element_text_method(%w(qualifier srckey anntext evidence))
384
+ end #class MaXML::Annotation
385
+
386
+ end #class MaXML
387
+
388
+ end #module FANTOM
389
+
390
+ end #module Bio
391
+
392
+ =begin
393
+
394
+ Bio::FANTOM are database classes (and modules) treating RIKEN FANTOM2 data.
395
+ FANTOM2 is available at ((<URL:http://fantom2.gsc.riken.go.jp/>)).
396
+
397
+ = Bio::FANTOM
398
+
399
+ This module contains useful methods to access databases.
400
+
401
+ --- Bio::FANTOM.query(idstr, http_proxy=nil)
402
+
403
+ Get MaXML sequence data corresponding to given ID through the internet
404
+ from ((<URL:http://fantom.gsc.riken.go.jp/db/maxml/)).
405
+ Returns Bio::FANTOM::MaXML::Sequence object.
406
+
407
+ --- Bio::FANTOM.get_by_id(idstr, http_proxy=nil)
408
+
409
+ Same as FANTOM.query, but returns XML document as a string.
410
+ (Reference: bio/io/registry.rb)
411
+
412
+
413
+ = Bio::FANTOM::MaXML::Cluster
414
+
415
+ This class is for 'allclust.sep.xml' found at
416
+ ((<URL:ftp://fantom2.gsc.riken.go.jp/fantom/2.1/allclust.sep.xml.gz>)).
417
+ Not that this class is not suitable for 'allclust.xml'.
418
+
419
+ --- Bio::FANTOM::MaXML::Cluster.new(str)
420
+
421
+ --- Bio::FANTOM::MaXML::Cluster#entry_id
422
+
423
+ --- Bio::FANTOM::MaXML::Cluster#fantomid
424
+
425
+ --- Bio::FANTOM::MaXML::Cluster#representative_seqid
426
+
427
+ --- Bio::FANTOM::MaXML::Cluster#sequences
428
+
429
+ Lists sequences in this cluster.
430
+ Returns Bio::FANTOM::MaXML::Sequences object.
431
+
432
+ --- Bio::FANTOM::MaXML::Cluster#sequence(id_str)
433
+
434
+ Shows a sequence information of given id.
435
+ Returns Bio::FANTOM::MaXML::Sequence object or nil.
436
+
437
+ --- Bio::FANTOM::MaXML::Cluster#representataive_sequence
438
+ --- Bio::FANTOM::MaXML::Cluster#representataive_clone
439
+
440
+ Shows a sequence of repesentative_seqid.
441
+ Returns Bio::FANTOM::MaXML::Sequence object (or nil).
442
+
443
+ -- Bio::FANTOM::MaXML::Cluster#representative_annotations
444
+
445
+ Shows annotations of repesentative sequence.
446
+ Returns Bio::FANTOM::MaXML::Annotations object (or nil).
447
+
448
+ -- Bio::FANTOM::MaXML::Cluster#representative_cloneid
449
+
450
+ Shows cloneid of repesentative sequence.
451
+ Returns String (or nil).
452
+
453
+
454
+ = Bio::FANTOM::MaXML::Sequences
455
+
456
+ The instances of this class are automatically created
457
+ by Bio::FANTOM::MaXML::Cluster class.
458
+
459
+ This class can also be used for 'allseq.sep.xml' and 'repseq.sep.xml',
460
+ but you'd better using Bio::FANTOM::MaXML::Sequence class.
461
+
462
+ In addition, this class can be used for 'allseq.xml' and 'repseq.xml',
463
+ but you'd better not to use them, becase of the speed is very slow.
464
+
465
+ --- Bio::FANTOM::MaXML::Sequences#to_a
466
+
467
+ Returns an Array of Bio::FANTOM::MaXML::Sequence objects.
468
+
469
+ --- Bio::FANTOM::MaXML::Sequences#each
470
+
471
+ --- Bio::FANTOM::MaXML::Sequences#[](x)
472
+
473
+ Same as to_a[x] when x is a integer.
474
+ Same as get[x] when x is a string.
475
+
476
+ --- Bio::FANTOM::MaXML::Sequences#get(id_str)
477
+
478
+ Shows a sequence information of given id.
479
+ Returns Bio::FANTOM::MaXML::Sequence object or nil.
480
+
481
+ --- Bio::FANTOM::MaXML::Sequences#cloneids
482
+
483
+ Shows clone ID list.
484
+ Returns an array of strings.
485
+
486
+ --- Bio::FANTOM::MaXML::Sequences#id_strings
487
+
488
+ Shows ID list.
489
+ Returns an array of strings.
490
+
491
+
492
+ = Bio::FANTOM::MaXML::Sequence
493
+
494
+ This class is for 'allseq.sep.xml' and 'repseq.sep.xml' found at
495
+ ((<URL:ftp://fantom2.gsc.riken.go.jp/fantom/2.1/allseq.sep.xml.gz>)) and
496
+ ((<URL:ftp://fantom2.gsc.riken.go.jp/fantom/2.1/repseq.sep.xml.gz>)).
497
+ Not that this class is not suitable for 'allseq.xml' and 'repseq.xml'.
498
+
499
+ In addition, the instances of this class are automatically created
500
+ by Bio::FANTOM::MaXML::Sequences class.
501
+
502
+ --- Bio::FANTOM::MaXML::Sequence.new(str)
503
+
504
+ --- Bio::FANTOM::MaXML::Sequence#entry_id
505
+
506
+ --- Bio::FANTOM::MaXML::Sequence#altid(type_str = nil)
507
+
508
+ Returns hash of altid if no arguments are given.
509
+ Returns ID as a string if a type of ID (string) is given.
510
+
511
+ --- Bio::FANTOM::MaXML::Sequence#annotations
512
+
513
+ Gets lists of annotation data.
514
+ Returns a Bio::FANTOM::MaXML::Annotations object.
515
+
516
+ --- Bio::FANTOM::MaXML::Sequence#id_strings
517
+
518
+ Gets lists of ID. (same as altid.values)
519
+ Returns an array of strings.
520
+
521
+ --- Bio::FANTOM::MaXML::Sequence#library_id
522
+
523
+ Shows library ID. (same as cloneid[0,2])
524
+ Library IDs are listed at:
525
+ ((<URL:http://fantom2.gsc.riken.go.jp/fantom2/SI/sup01_est_3r_libraryinfo.pdf))
526
+ ((<URL:http://fantom2.gsc.riken.go.jp/fantom2/SI/sup01_est_5f_libraryinfo.pdf))
527
+
528
+ --- Bio::FANTOM::MaXML::Sequence#seqid
529
+
530
+ --- Bio::FANTOM::MaXML::Sequence#fantomid
531
+
532
+ --- Bio::FANTOM::MaXML::Sequence#cloneid
533
+
534
+ --- Bio::FANTOM::MaXML::Sequence#rearrayid
535
+
536
+ --- Bio::FANTOM::MaXML::Sequence#accession
537
+
538
+ --- Bio::FANTOM::MaXML::Sequence#annotator
539
+
540
+ --- Bio::FANTOM::MaXML::Sequence#version
541
+
542
+ --- Bio::FANTOM::MaXML::Sequence#modified_time
543
+
544
+ --- Bio::FANTOM::MaXML::Sequence#comment
545
+
546
+
547
+ = Bio::FANTOM::MaXML::Annotations
548
+
549
+ The instances of this class are automatically created
550
+ by Bio::FANTOM::MaXML::Sequence class.
551
+
552
+ --- Bio::FANTOM::MaXML::Annotations#to_a
553
+
554
+ Returns an Array of Bio::FANTOM::MaXML::Annotations objects.
555
+
556
+ --- Bio::FANTOM::MaXML::Annotations#each
557
+
558
+ --- Bio::FANTOM::MaXML::Annotations#get_all_by_qualifier(qstr)
559
+
560
+ --- Bio::FANTOM::MaXML::Annotations#get_by_qualifier(qstr)
561
+
562
+ --- Bio::FANTOM::MaXML::Annotations#[](x)
563
+
564
+ Same as to_a[x] when x is a integer.
565
+ Same as get_by_qualifier[x] when x is a string.
566
+
567
+ --- Bio::FANTOM::MaXML::Annotations#cds_start
568
+ --- Bio::FANTOM::MaXML::Annotations#cds_stop
569
+ --- Bio::FANTOM::MaXML::Annotations#gene_name
570
+ --- Bio::FANTOM::MaXML::Annotations#data_source
571
+ --- Bio::FANTOM::MaXML::Annotations#evidence
572
+
573
+
574
+ = Bio::FANTOM::MaXML::Annotation
575
+
576
+ The instances of this class are automatically created
577
+ by Bio::FANTOM::MaXML::Annotations class.
578
+
579
+ --- Bio::FANTOM::MaXML::Annotation#datasrc
580
+
581
+ Returns an Array of Bio::FANTOM::MaXML::Annotation::DataSrc objects.
582
+
583
+ --- Bio::FANTOM::MaXML::Annotation#qualifier
584
+
585
+ --- Bio::FANTOM::MaXML::Annotation#srckey
586
+
587
+ --- Bio::FANTOM::MaXML::Annotation#anntext
588
+
589
+ --- Bio::FANTOM::MaXML::Annotation#evidence
590
+
591
+ = Bio::FANTOM::MaXML::Annotation::DataSrc < String
592
+
593
+ The instances of this class are automatically created
594
+ by Bio::FANTOM::MaXML::Annotation class.
595
+
596
+ ---- Bio::FANTOM::MaXML::Annotation::DataSrc#href
597
+
598
+ Shows a link URL to database web page as an String.
599
+
600
+ = References
601
+
602
+ * ((<URL:http://fantom2.gsc.riken.go.jp/>))
603
+
604
+ =end