bio 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,201 @@
1
+ #
2
+ # = bio/feature.rb - Features/Feature class (GenBank Feature table)
3
+ #
4
+ # Copyright:: Copyright (c) 2002, 2005
5
+ # Toshiaki Katayama <k@bioruby.org>
6
+ # License:: LGPL
7
+ #
8
+ # $Id: feature.rb,v 1.9 2005/11/30 01:54:38 k Exp $
9
+ #
10
+ #--
11
+ # *TODO*
12
+ # add to_gff method
13
+ #++
14
+ #
15
+ # == INSD Feature table definition
16
+ #
17
+ # See http://www.ddbj.nig.ac.jp/FT/full_index.html for the INSD
18
+ # (GenBank/EMBL/DDBJ) Feature table definition.
19
+ #
20
+ # === Example
21
+ #
22
+ # # suppose features is a Bio::Features object
23
+ # features.each do |feature|
24
+ # f_name = feature.feature
25
+ # f_pos = feature.position
26
+ # puts "#{f_name}:\t#{f_pos}"
27
+ # feature.each do |qualifier|
28
+ # q_name = qualifier.qualifier
29
+ # q_val = qualifier.value
30
+ # puts "- #{q_name}:\t#{q_val}"
31
+ # end
32
+ # end
33
+ #
34
+ # # Iterates only on CDS features and extract translated amino acid sequences
35
+ # features.each("CDS") do |feature|
36
+ # hash = feature.assoc
37
+ # name = hash["gene"] || hash["product"] || hash["note"]
38
+ # seq = hash["translation"]
39
+ # pos = feature.position
40
+ # if gene and seq
41
+ # puts ">#{gene} #{feature.position}"
42
+ # puts aaseq
43
+ # end
44
+ # end
45
+ #
46
+ #--
47
+ #
48
+ # This library is free software; you can redistribute it and/or
49
+ # modify it under the terms of the GNU Lesser General Public
50
+ # License as published by the Free Software Foundation; either
51
+ # version 2 of the License, or (at your option) any later version.
52
+ #
53
+ # This library is distributed in the hope that it will be useful,
54
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
55
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
56
+ # Lesser General Public License for more details.
57
+ #
58
+ # You should have received a copy of the GNU Lesser General Public
59
+ # License along with this library; if not, write to the Free Software
60
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
61
+ #
62
+ #++
63
+ #
64
+
65
+ require 'bio/location'
66
+
67
+ module Bio
68
+
69
+ # Container for the sequence annotation.
70
+ class Feature
71
+
72
+ def initialize(feature = '', position = '', qualifiers = [])
73
+ @feature, @position, @qualifiers = feature, position, qualifiers
74
+ end
75
+
76
+ # Returns type of feature in String (e.g 'CDS', 'gene')
77
+ attr_accessor :feature
78
+
79
+ # Returns position of the feature in String (e.g. 'complement(123..146)')
80
+ attr_accessor :position
81
+
82
+ # Returns an Array of Qualifier objects.
83
+ attr_accessor :qualifiers
84
+
85
+ # Returns a Bio::Locations object translated from the position string.
86
+ def locations
87
+ Locations.new(@position)
88
+ end
89
+
90
+ # Appends a Qualifier object to the Feature.
91
+ #
92
+ # * Returns an Array of Qualifier objects.
93
+ # * If the argument is not a Qualifier object, returns nil.
94
+ #
95
+ def append(a)
96
+ @qualifiers.push(a) if a.is_a? Qualifier
97
+ return self
98
+ end
99
+
100
+ # Iterates on each qualifier.
101
+ def each(arg = nil)
102
+ @qualifiers.each do |x|
103
+ next if arg and x.qualifier != arg
104
+ yield x
105
+ end
106
+ end
107
+
108
+ # Returns a Hash constructed from qualifier objects.
109
+ def assoc
110
+ STDERR.puts "Bio::Feature#assoc is deprecated, use Bio::Feature#to_hash instead" if $DEBUG
111
+ hash = Hash.new
112
+ @qualifiers.each do |x|
113
+ hash[x.qualifier] = x.value
114
+ end
115
+ return hash
116
+ end
117
+
118
+ # Returns a Hash constructed from qualifier objects.
119
+ def to_hash
120
+ hash = Hash.new
121
+ @qualifiers.each do |x|
122
+ hash[x.qualifier] ||= []
123
+ hash[x.qualifier] << x.value
124
+ end
125
+ return hash
126
+ end
127
+
128
+ # Short cut for the Bio::Feature#to_hash[key]
129
+ def [](key)
130
+ self.to_hash[key]
131
+ end
132
+
133
+ # Container for the qualifier-value pair.
134
+ class Qualifier
135
+
136
+ def initialize(key, value)
137
+ @qualifier, @value = key, value
138
+ end
139
+
140
+ # Qualifier name in String
141
+ attr_reader :qualifier
142
+
143
+ # Qualifier value in String
144
+ attr_reader :value
145
+
146
+ end
147
+
148
+ end
149
+
150
+
151
+ # Container for the list of Feature objects.
152
+ class Features
153
+
154
+ def initialize(ary = [])
155
+ @features = ary
156
+ end
157
+
158
+ # Returns an Array of Feature objects.
159
+ attr_accessor :features
160
+
161
+ def to_gff
162
+ # *TODO*
163
+ # to generate Bio::GFF object and implement Bio::GFF#to_s or
164
+ # to generate GFF string in this method?
165
+ end
166
+
167
+ # Appends a Feature object to Features.
168
+ def append(a)
169
+ @features.push(a) if a.is_a? Feature
170
+ return self
171
+ end
172
+
173
+ # Iterates on each feature. If a feature name is given as an argument,
174
+ # only iterates on each feature belongs to the name (e.g. 'CDS' etc.)
175
+ def each(arg = nil)
176
+ @features.each do |x|
177
+ next if arg and x.feature != arg
178
+ yield x
179
+ end
180
+ end
181
+
182
+ # Short cut for the Features#features[n]
183
+ def [](*arg)
184
+ @features[*arg]
185
+ end
186
+
187
+ # Short cut for the Features#features.first
188
+ def first
189
+ @features.first
190
+ end
191
+
192
+ # Short cut for the Features#features.last
193
+ def last
194
+ @features.last
195
+ end
196
+
197
+ end
198
+
199
+ end # Bio
200
+
201
+
@@ -0,0 +1,103 @@
1
+ #
2
+ # bio/io/brdb.rb - BioRuby-DB access module
3
+ #
4
+ # Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: brdb.rb,v 1.4 2005/09/26 13:00:08 k Exp $
21
+ #
22
+
23
+ begin
24
+ require 'dbi'
25
+ rescue LoadError
26
+ end
27
+
28
+ module Bio
29
+
30
+ class BRDB
31
+
32
+ def initialize(*args)
33
+ @brdb = args
34
+ end
35
+
36
+ def fetch(db_table, entry_id)
37
+ DBI.connect(*@brdb) do |dbh|
38
+ query = "select * from #{db_table} where id = ?"
39
+ dbh.execute(query, entry_id).fetch_all
40
+ end
41
+ end
42
+
43
+ def insert(db_table, values)
44
+ if values.is_a?(Array)
45
+ values = values.map{ |x| '"' + DBI.quote(x) + '"' }.join(",")
46
+ end
47
+ DBI.connect(*@brdb) do |dbh|
48
+ query = "insert into #{db_table} values (?);"
49
+ dbh.execute(query, values)
50
+ end
51
+ end
52
+
53
+ def update(db_table, entry_id, values)
54
+ if values.is_a?(Hash)
55
+ values = values.to_a.map{ |k, v| "#{k}='#{DBI.quote(v)}'" }.join(',')
56
+ end
57
+ DBI.connect(*@brdb) do |dbh|
58
+ query = "update #{db_table} set ? where id = ?"
59
+ dbh.execute(query, values, entry_id)
60
+ end
61
+ end
62
+
63
+ def search(db_table, field, keyword)
64
+ end
65
+
66
+ end
67
+
68
+ end
69
+
70
+
71
+ if __FILE__ == $0
72
+ begin
73
+ require 'pp'
74
+ alias p pp
75
+ rescue LoadError
76
+ end
77
+
78
+ db = 'dbi:Mysql:host=db.bioruby.org;database=genbank'
79
+ user = 'root'
80
+
81
+ serv = Bio::BRDB.new(db, user)
82
+
83
+ serv.fetch('ent', 'AA2CG').each do |row|
84
+ p row.to_h
85
+ end
86
+ serv.fetch('ft', 'AA2CG').each do |row|
87
+ p row.to_h
88
+ end
89
+ end
90
+
91
+
92
+ =begin
93
+
94
+ = Bio::BRDB
95
+
96
+ --- Bio::BRDB.new(*args)
97
+
98
+ --- Bio::BRDB#close
99
+ --- Bio::BRDB#fetch(db_table, entry_id)
100
+ --- Bio::BRDB#update(db_table, entry_id, hash)
101
+ --- Bio::BRDB#insert(db_table, ary)
102
+
103
+ =end
data/lib/bio/io/das.rb ADDED
@@ -0,0 +1,471 @@
1
+ #
2
+ # = bio/io/das.rb - BioDAS access module
3
+ #
4
+ # Copyright:: Copyright (C) 2003, 2004
5
+ # Shuichi Kawashima <shuichi@hgc.jp>,
6
+ # Toshiaki Katayama <k@bioruby.org>
7
+ # License:: LGPL
8
+ #
9
+ # $Id: das.rb,v 1.11 2005/11/14 02:01:54 k Exp $
10
+ #
11
+ #--
12
+ # == TODO
13
+ #
14
+ # link, stylesheet
15
+ #
16
+ #++
17
+ #
18
+ #--
19
+ #
20
+ # This library is free software; you can redistribute it and/or
21
+ # modify it under the terms of the GNU Lesser General Public
22
+ # License as published by the Free Software Foundation; either
23
+ # version 2 of the License, or (at your option) any later version.
24
+ #
25
+ # This library is distributed in the hope that it will be useful,
26
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
27
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28
+ # Lesser General Public License for more details.
29
+ #
30
+ # You should have received a copy of the GNU Lesser General Public
31
+ # License along with this library; if not, write to the Free Software
32
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
33
+ #
34
+ #++
35
+ #
36
+
37
+ begin
38
+ require 'rexml/document'
39
+ rescue LoadError
40
+ end
41
+ require 'uri'
42
+ require 'net/http'
43
+ require 'bio/sequence'
44
+
45
+
46
+ module Bio
47
+
48
+ class DAS
49
+
50
+ # Specify DAS server to connect
51
+ def initialize(url = 'http://www.wormbase.org:80/db/')
52
+ schema, user, host, port, reg, path, = URI.split(url)
53
+ @server = Net::HTTP.new(host, port)
54
+ @prefix = path ? path.chomp('/') : ''
55
+ end
56
+
57
+ # Returns an Array of Bio::DAS::DSN
58
+ def get_dsn
59
+ ary = []
60
+ result, = @server.get(@prefix + '/das/dsn')
61
+ doc = REXML::Document.new(result.body)
62
+ doc.elements.each('/descendant::DSN') do |e|
63
+ dsn = DSN.new
64
+ e.elements.each do |e|
65
+ case e.name
66
+ when 'SOURCE'
67
+ dsn.source = e.text
68
+ dsn.source_id = e.attributes['id']
69
+ dsn.source_version = e.attributes['version']
70
+ when 'MAPMASTER'
71
+ dsn.mapmaster = e.name
72
+ when 'DESCRIPTION'
73
+ dsn.description = e.text
74
+ dsn.description_href = e.attributes['href']
75
+ end
76
+ end
77
+ ary << dsn
78
+ end
79
+ ary
80
+ end
81
+
82
+ # Returns Bio::DAS::ENTRY_POINT.
83
+ # The 'dsn' can be a String or a Bio::DAS::DSN object.
84
+ def get_entry_point(dsn)
85
+ entry_point = ENTRY_POINT.new
86
+ dsn = dsn.source if dsn.instance_of?(Bio::DAS::DSN)
87
+ result, = @server.get(@prefix + '/das/' + dsn + '/entry_points')
88
+ doc = REXML::Document.new(result.body)
89
+ doc.elements.each('/descendant::ENTRY_POINTS') do |e|
90
+ entry_point.href = e.attributes['href']
91
+ entry_point.version = e.attributes['version']
92
+ e.elements.each do |e|
93
+ segment = SEGMENT.new
94
+ segment.entry_id = e.attributes['id']
95
+ segment.start = e.attributes['start']
96
+ segment.stop = e.attributes['stop']
97
+ segment.stop = e.attributes['orientation']
98
+ segment.subparts = e.attributes['subparts']
99
+ segment.description = e.text
100
+ entry_point.segments << segment
101
+ end
102
+ end
103
+ entry_point
104
+ end
105
+
106
+ # Returns an Array of Bio::DAS::DNA.
107
+ # The 'dsn' can be a String or a Bio::DAS::DSN object.
108
+ # The 'segments' can be a Bio::DAS::SEGMENT object or an Array of
109
+ # Bio::DAS::SEGMENT
110
+ def get_dna(dsn, segments)
111
+ ary = []
112
+
113
+ dsn = dsn.source if dsn.instance_of?(DSN)
114
+ segments = [segments] if segments.instance_of?(SEGMENT)
115
+
116
+ opts = []
117
+ segments.each do |s|
118
+ opts << "segment=#{s.entry_id}:#{s.start},#{s.stop}"
119
+ end
120
+ query = opts.join(';')
121
+
122
+ result, = @server.get(@prefix + '/das/' + dsn + '/dna?' + query)
123
+ doc = REXML::Document.new(result.body)
124
+ doc.elements.each('/descendant::SEQUENCE') do |e|
125
+ sequence = DNA.new
126
+ sequence.entry_id = e.attributes['id']
127
+ sequence.start = e.attributes['start']
128
+ sequence.stop = e.attributes['stop']
129
+ sequence.version = e.attributes['version']
130
+ e.elements.each do |e|
131
+ sequence.sequence = Bio::Sequence::NA.new(e.text)
132
+ sequence.length = e.attributes['length'].to_i
133
+ end
134
+ ary << sequence
135
+ end
136
+ ary
137
+ end
138
+
139
+ # Returns an Array of Bio::DAS::SEQUENCE.
140
+ # The 'dsn' can be a String or a Bio::DAS::DSN object.
141
+ # The 'segments' can be a Bio::DAS::SEGMENT object or an Array of
142
+ # Bio::DAS::SEGMENT
143
+ def get_sequence(dsn, segments)
144
+ ary = []
145
+
146
+ dsn = dsn.source if dsn.instance_of?(DSN)
147
+ segments = [segments] if segments.instance_of?(SEGMENT)
148
+
149
+ opts = []
150
+ segments.each do |s|
151
+ opts << "segment=#{s.entry_id}:#{s.start},#{s.stop}"
152
+ end
153
+ query = opts.join(';')
154
+
155
+ result, = @server.get(@prefix + '/das/' + dsn + '/sequence?' + query)
156
+ doc = REXML::Document.new(result.body)
157
+ doc.elements.each('/descendant::SEQUENCE') do |e|
158
+ sequence = SEQUENCE.new
159
+ sequence.entry_id = e.attributes['id']
160
+ sequence.start = e.attributes['start']
161
+ sequence.stop = e.attributes['stop']
162
+ sequence.moltype = e.attributes['moltype']
163
+ sequence.version = e.attributes['version']
164
+ case sequence.moltype
165
+ when /dna|rna/i # 'DNA', 'ssRNA', 'dsRNA'
166
+ sequence.sequence = Bio::Sequence::NA.new(e.text)
167
+ when /protein/i # 'Protein
168
+ sequence.sequence = Bio::Sequence::AA.new(e.text)
169
+ else
170
+ sequence.sequence = e.text
171
+ end
172
+ ary << sequence
173
+ end
174
+ ary
175
+ end
176
+
177
+ # Returns a Bio::DAS::TYPES object.
178
+ # The 'dsn' can be a String or a Bio::DAS::DSN object.
179
+ # The 'segments' is optional and can be a Bio::DAS::SEGMENT object or
180
+ # an Array of Bio::DAS::SEGMENT
181
+ def get_types(dsn, segments = []) # argument 'type' is deprecated
182
+ types = TYPES.new
183
+
184
+ dsn = dsn.source if dsn.instance_of?(DSN)
185
+ segments = [segments] if segments.instance_of?(SEGMENT)
186
+
187
+ opts = []
188
+ segments.each do |s|
189
+ opts << "segment=#{s.entry_id}:#{s.start},#{s.stop}"
190
+ end
191
+ query = opts.join(';')
192
+
193
+ result, = @server.get(@prefix + '/das/' + dsn + '/types?' + query)
194
+ doc = REXML::Document.new(result.body)
195
+ doc.elements.each('/descendant::GFF') do |e|
196
+ types.version = e.attributes['version']
197
+ types.href = e.attributes['href']
198
+ e.elements.each do |e|
199
+ segment = SEGMENT.new
200
+ segment.entry_id = e.attributes['id']
201
+ segment.start = e.attributes['start']
202
+ segment.stop = e.attributes['stop']
203
+ segment.version = e.attributes['version']
204
+ segment.label = e.attributes['label']
205
+ e.elements.each do |e|
206
+ t = TYPE.new
207
+ t.entry_id = e.attributes['id']
208
+ t.method = e.attributes['method']
209
+ t.category = e.attributes['category']
210
+ t.count = e.text.to_i
211
+ segment.types << t
212
+ end
213
+ types.segments << segment
214
+ end
215
+ end
216
+ types
217
+ end
218
+
219
+ # Returns a Bio::DAS::GFF object.
220
+ # The 'dsn' can be a String or a Bio::DAS::DSN object.
221
+ # The 'segments' is optional and can be a Bio::DAS::SEGMENT object or
222
+ # an Array of Bio::DAS::SEGMENT
223
+ def get_features(dsn, segments = [], categorize = false, feature_ids = [], group_ids = [])
224
+ # arguments 'type' and 'category' are deprecated
225
+ gff = GFF.new
226
+
227
+ dsn = dsn.source if dsn.instance_of?(DSN)
228
+ segments = [segments] if segments.instance_of?(SEGMENT)
229
+
230
+ opts = []
231
+ segments.each do |s|
232
+ opts << "segment=#{s.entry_id}:#{s.start},#{s.stop}"
233
+ end
234
+ if categorize
235
+ opts << "categorize=yes" # default is 'no'
236
+ end
237
+ feature_ids.each do |fid|
238
+ opts << "feature_id=#{fid}"
239
+ end
240
+ group_ids.each do |gid|
241
+ opts << "group_id=#{gid}"
242
+ end
243
+ query = opts.join(';')
244
+
245
+ result, = @server.get(@prefix + '/das/' + dsn + '/features?' + query)
246
+ doc = REXML::Document.new(result.body)
247
+ doc.elements.each('/descendant::GFF') do |e|
248
+ gff.version = e.attributes['version']
249
+ gff.href = e.attributes['href']
250
+ e.elements.each('SEGMENT') do |e|
251
+ segment = SEGMENT.new
252
+ segment.entry_id = e.attributes['id']
253
+ segment.start = e.attributes['start']
254
+ segment.stop = e.attributes['stop']
255
+ segment.version = e.attributes['version']
256
+ segment.label = e.attributes['label']
257
+ e.elements.each do |e|
258
+ feature = FEATURE.new
259
+ feature.entry_id = e.attributes['id']
260
+ feature.label = e.attributes['label']
261
+ e.elements.each do |e|
262
+ case e.name
263
+ when 'TYPE'
264
+ type = TYPE.new
265
+ type.entry_id = e.attributes['id']
266
+ type.category = e.attributes['category']
267
+ type.reference = e.attributes['referrence']
268
+ type.label = e.text
269
+ feature.types << type
270
+ when 'METHOD'
271
+ feature.method_id = e.attributes['id']
272
+ feature.method = e.text
273
+ when 'START'
274
+ feature.start = e.text
275
+ when 'STOP', 'END'
276
+ feature.stop = e.text
277
+ when 'SCORE'
278
+ feature.score = e.text
279
+ when 'ORIENTATION'
280
+ feature.orientation = e.text
281
+ when 'PHASE'
282
+ feature.phase = e.text
283
+ when 'NOTE'
284
+ feature.notes << e.text
285
+ when 'LINK'
286
+ link = LINK.new
287
+ link.href = e.attributes['href']
288
+ link.text = e.text
289
+ feature.links << link
290
+ when 'TARGET'
291
+ target = TARGET.new
292
+ target.entry_id = e.attributes['id']
293
+ target.start = e.attributes['start']
294
+ target.stop = e.attributes['stop']
295
+ target.name = e.text
296
+ feature.targets << target
297
+ when 'GROUP'
298
+ group = GROUP.new
299
+ group.entry_id = e.attributes['id']
300
+ group.label = e.attributes['label']
301
+ group.type = e.attributes['type']
302
+ e.elements.each do |e|
303
+ case e.name
304
+ when 'NOTE' # in GROUP
305
+ group.notes << e.text
306
+ when 'LINK' # in GROUP
307
+ link = LINK.new
308
+ link.href = e.attributes['href']
309
+ link.text = e.text
310
+ group.links << link
311
+ when 'TARGET' # in GROUP
312
+ target = TARGET.new
313
+ target.entry_id = e.attributes['id']
314
+ target.start = e.attributes['start']
315
+ target.stop = e.attributes['stop']
316
+ target.name = e.text
317
+ group.targets << target
318
+ end
319
+ end
320
+ feature.groups << group
321
+ end
322
+ end
323
+ segment.features << feature
324
+ end
325
+ gff.segments << segment
326
+ end
327
+ end
328
+ gff
329
+ end
330
+
331
+
332
+ class DSN
333
+ attr_accessor :source, :source_id, :source_version,
334
+ :mapmaster, :description, :description_href
335
+ end
336
+
337
+ class ENTRY_POINT
338
+ def initialize
339
+ @segments = Array.new
340
+ end
341
+ attr_reader :segments
342
+ attr_accessor :href, :version
343
+
344
+ def each
345
+ @segments.each do |x|
346
+ yield x
347
+ end
348
+ end
349
+ end
350
+
351
+ class SEGMENT
352
+ def self.region(entry_id, start, stop)
353
+ segment = self.new
354
+ segment.entry_id = entry_id
355
+ segment.start = start
356
+ segment.stop = stop
357
+ return segment
358
+ end
359
+
360
+ def initialize
361
+ @features = Array.new # for FEATURE
362
+ @types = Array.new # for TYPE
363
+ end
364
+ attr_accessor :entry_id, :start, :stop, :orientation, :description,
365
+ :subparts, # optional
366
+ :features, :version, :label, # for FEATURE
367
+ :types # for TYPE
368
+ end
369
+
370
+ class DNA
371
+ attr_accessor :entry_id, :start, :stop, :version, :sequence, :length
372
+ end
373
+
374
+ class SEQUENCE
375
+ attr_accessor :entry_id, :start, :stop, :moltype, :version, :sequence
376
+ end
377
+
378
+ class TYPES < ENTRY_POINT; end
379
+
380
+ class TYPE
381
+ attr_accessor :entry_id, :method, :category, :count,
382
+ :reference, :label # for FEATURE
383
+ end
384
+
385
+ class GFF
386
+ def initialize
387
+ @segments = Array.new
388
+ end
389
+ attr_reader :segments
390
+ attr_accessor :version, :href
391
+ end
392
+
393
+ class FEATURE
394
+ def initialize
395
+ @notes = Array.new
396
+ @links = Array.new
397
+ @types = Array.new
398
+ @targets = Array.new
399
+ @groups = Array.new
400
+ end
401
+ attr_accessor :entry_id, :label,
402
+ :method_id, :method, :start, :stop, :score, :orientation, :phase
403
+ attr_reader :notes, :links, :types, :targets, :groups
404
+ end
405
+
406
+ class LINK
407
+ attr_accessor :href, :text
408
+ end
409
+
410
+ class TARGET
411
+ attr_accessor :entry_id, :start, :stop, :name
412
+ end
413
+
414
+ class GROUP
415
+ def initialize
416
+ @notes = Array.new
417
+ @links = Array.new
418
+ @targets = Array.new
419
+ end
420
+ attr_accessor :entry_id, :label, :type
421
+ attr_reader :notes, :links, :targets
422
+ end
423
+
424
+ end
425
+
426
+ end # module Bio
427
+
428
+
429
+ if __FILE__ == $0
430
+
431
+ # begin
432
+ # require 'pp'
433
+ # alias p pp
434
+ # rescue LoadError
435
+ # end
436
+
437
+ puts "### WormBase"
438
+ wormbase = Bio::DAS.new('http://www.wormbase.org/db/')
439
+
440
+ puts ">>> test get_dsn"
441
+ p wormbase.get_dsn
442
+
443
+ puts ">>> create segment obj Bio::DAS::SEGMENT.region('I', 1, 1000)"
444
+ seg = Bio::DAS::SEGMENT.region('I', 1, 1000)
445
+ p seg
446
+
447
+ puts ">>> test get_dna"
448
+ p wormbase.get_dna('elegans', seg)
449
+
450
+ puts "### test get_features"
451
+ p wormbase.get_features('elegans', seg)
452
+
453
+ puts "### KEGG DAS"
454
+ kegg_das = Bio::DAS.new("http://das.hgc.jp/cgi-bin/")
455
+
456
+ dsn_list = kegg_das.get_dsn
457
+ org_list = dsn_list.collect {|x| x.source}
458
+
459
+ puts ">>> dsn : entry_points"
460
+ org_list.each do |org|
461
+ print "#{org} : "
462
+ list = kegg_das.get_entry_point(org)
463
+ list.segments.each do |seg|
464
+ print " #{seg.entry_id}"
465
+ end
466
+ puts
467
+ end
468
+
469
+ end
470
+
471
+