bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,201 @@
1
+ #
2
+ # = bio/feature.rb - Features/Feature class (GenBank Feature table)
3
+ #
4
+ # Copyright:: Copyright (c) 2002, 2005
5
+ # Toshiaki Katayama <k@bioruby.org>
6
+ # License:: LGPL
7
+ #
8
+ # $Id: feature.rb,v 1.9 2005/11/30 01:54:38 k Exp $
9
+ #
10
+ #--
11
+ # *TODO*
12
+ # add to_gff method
13
+ #++
14
+ #
15
+ # == INSD Feature table definition
16
+ #
17
+ # See http://www.ddbj.nig.ac.jp/FT/full_index.html for the INSD
18
+ # (GenBank/EMBL/DDBJ) Feature table definition.
19
+ #
20
+ # === Example
21
+ #
22
+ # # suppose features is a Bio::Features object
23
+ # features.each do |feature|
24
+ # f_name = feature.feature
25
+ # f_pos = feature.position
26
+ # puts "#{f_name}:\t#{f_pos}"
27
+ # feature.each do |qualifier|
28
+ # q_name = qualifier.qualifier
29
+ # q_val = qualifier.value
30
+ # puts "- #{q_name}:\t#{q_val}"
31
+ # end
32
+ # end
33
+ #
34
+ # # Iterates only on CDS features and extract translated amino acid sequences
35
+ # features.each("CDS") do |feature|
36
+ # hash = feature.assoc
37
+ # name = hash["gene"] || hash["product"] || hash["note"]
38
+ # seq = hash["translation"]
39
+ # pos = feature.position
40
+ # if gene and seq
41
+ # puts ">#{gene} #{feature.position}"
42
+ # puts aaseq
43
+ # end
44
+ # end
45
+ #
46
+ #--
47
+ #
48
+ # This library is free software; you can redistribute it and/or
49
+ # modify it under the terms of the GNU Lesser General Public
50
+ # License as published by the Free Software Foundation; either
51
+ # version 2 of the License, or (at your option) any later version.
52
+ #
53
+ # This library is distributed in the hope that it will be useful,
54
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
55
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
56
+ # Lesser General Public License for more details.
57
+ #
58
+ # You should have received a copy of the GNU Lesser General Public
59
+ # License along with this library; if not, write to the Free Software
60
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
61
+ #
62
+ #++
63
+ #
64
+
65
+ require 'bio/location'
66
+
67
+ module Bio
68
+
69
+ # Container for the sequence annotation.
70
+ class Feature
71
+
72
+ def initialize(feature = '', position = '', qualifiers = [])
73
+ @feature, @position, @qualifiers = feature, position, qualifiers
74
+ end
75
+
76
+ # Returns type of feature in String (e.g 'CDS', 'gene')
77
+ attr_accessor :feature
78
+
79
+ # Returns position of the feature in String (e.g. 'complement(123..146)')
80
+ attr_accessor :position
81
+
82
+ # Returns an Array of Qualifier objects.
83
+ attr_accessor :qualifiers
84
+
85
+ # Returns a Bio::Locations object translated from the position string.
86
+ def locations
87
+ Locations.new(@position)
88
+ end
89
+
90
+ # Appends a Qualifier object to the Feature.
91
+ #
92
+ # * Returns an Array of Qualifier objects.
93
+ # * If the argument is not a Qualifier object, returns nil.
94
+ #
95
+ def append(a)
96
+ @qualifiers.push(a) if a.is_a? Qualifier
97
+ return self
98
+ end
99
+
100
+ # Iterates on each qualifier.
101
+ def each(arg = nil)
102
+ @qualifiers.each do |x|
103
+ next if arg and x.qualifier != arg
104
+ yield x
105
+ end
106
+ end
107
+
108
+ # Returns a Hash constructed from qualifier objects.
109
+ def assoc
110
+ STDERR.puts "Bio::Feature#assoc is deprecated, use Bio::Feature#to_hash instead" if $DEBUG
111
+ hash = Hash.new
112
+ @qualifiers.each do |x|
113
+ hash[x.qualifier] = x.value
114
+ end
115
+ return hash
116
+ end
117
+
118
+ # Returns a Hash constructed from qualifier objects.
119
+ def to_hash
120
+ hash = Hash.new
121
+ @qualifiers.each do |x|
122
+ hash[x.qualifier] ||= []
123
+ hash[x.qualifier] << x.value
124
+ end
125
+ return hash
126
+ end
127
+
128
+ # Short cut for the Bio::Feature#to_hash[key]
129
+ def [](key)
130
+ self.to_hash[key]
131
+ end
132
+
133
+ # Container for the qualifier-value pair.
134
+ class Qualifier
135
+
136
+ def initialize(key, value)
137
+ @qualifier, @value = key, value
138
+ end
139
+
140
+ # Qualifier name in String
141
+ attr_reader :qualifier
142
+
143
+ # Qualifier value in String
144
+ attr_reader :value
145
+
146
+ end
147
+
148
+ end
149
+
150
+
151
+ # Container for the list of Feature objects.
152
+ class Features
153
+
154
+ def initialize(ary = [])
155
+ @features = ary
156
+ end
157
+
158
+ # Returns an Array of Feature objects.
159
+ attr_accessor :features
160
+
161
+ def to_gff
162
+ # *TODO*
163
+ # to generate Bio::GFF object and implement Bio::GFF#to_s or
164
+ # to generate GFF string in this method?
165
+ end
166
+
167
+ # Appends a Feature object to Features.
168
+ def append(a)
169
+ @features.push(a) if a.is_a? Feature
170
+ return self
171
+ end
172
+
173
+ # Iterates on each feature. If a feature name is given as an argument,
174
+ # only iterates on each feature belongs to the name (e.g. 'CDS' etc.)
175
+ def each(arg = nil)
176
+ @features.each do |x|
177
+ next if arg and x.feature != arg
178
+ yield x
179
+ end
180
+ end
181
+
182
+ # Short cut for the Features#features[n]
183
+ def [](*arg)
184
+ @features[*arg]
185
+ end
186
+
187
+ # Short cut for the Features#features.first
188
+ def first
189
+ @features.first
190
+ end
191
+
192
+ # Short cut for the Features#features.last
193
+ def last
194
+ @features.last
195
+ end
196
+
197
+ end
198
+
199
+ end # Bio
200
+
201
+
@@ -0,0 +1,103 @@
1
+ #
2
+ # bio/io/brdb.rb - BioRuby-DB access module
3
+ #
4
+ # Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: brdb.rb,v 1.4 2005/09/26 13:00:08 k Exp $
21
+ #
22
+
23
+ begin
24
+ require 'dbi'
25
+ rescue LoadError
26
+ end
27
+
28
+ module Bio
29
+
30
+ class BRDB
31
+
32
+ def initialize(*args)
33
+ @brdb = args
34
+ end
35
+
36
+ def fetch(db_table, entry_id)
37
+ DBI.connect(*@brdb) do |dbh|
38
+ query = "select * from #{db_table} where id = ?"
39
+ dbh.execute(query, entry_id).fetch_all
40
+ end
41
+ end
42
+
43
+ def insert(db_table, values)
44
+ if values.is_a?(Array)
45
+ values = values.map{ |x| '"' + DBI.quote(x) + '"' }.join(",")
46
+ end
47
+ DBI.connect(*@brdb) do |dbh|
48
+ query = "insert into #{db_table} values (?);"
49
+ dbh.execute(query, values)
50
+ end
51
+ end
52
+
53
+ def update(db_table, entry_id, values)
54
+ if values.is_a?(Hash)
55
+ values = values.to_a.map{ |k, v| "#{k}='#{DBI.quote(v)}'" }.join(',')
56
+ end
57
+ DBI.connect(*@brdb) do |dbh|
58
+ query = "update #{db_table} set ? where id = ?"
59
+ dbh.execute(query, values, entry_id)
60
+ end
61
+ end
62
+
63
+ def search(db_table, field, keyword)
64
+ end
65
+
66
+ end
67
+
68
+ end
69
+
70
+
71
+ if __FILE__ == $0
72
+ begin
73
+ require 'pp'
74
+ alias p pp
75
+ rescue LoadError
76
+ end
77
+
78
+ db = 'dbi:Mysql:host=db.bioruby.org;database=genbank'
79
+ user = 'root'
80
+
81
+ serv = Bio::BRDB.new(db, user)
82
+
83
+ serv.fetch('ent', 'AA2CG').each do |row|
84
+ p row.to_h
85
+ end
86
+ serv.fetch('ft', 'AA2CG').each do |row|
87
+ p row.to_h
88
+ end
89
+ end
90
+
91
+
92
+ =begin
93
+
94
+ = Bio::BRDB
95
+
96
+ --- Bio::BRDB.new(*args)
97
+
98
+ --- Bio::BRDB#close
99
+ --- Bio::BRDB#fetch(db_table, entry_id)
100
+ --- Bio::BRDB#update(db_table, entry_id, hash)
101
+ --- Bio::BRDB#insert(db_table, ary)
102
+
103
+ =end
data/lib/bio/io/das.rb ADDED
@@ -0,0 +1,471 @@
1
+ #
2
+ # = bio/io/das.rb - BioDAS access module
3
+ #
4
+ # Copyright:: Copyright (C) 2003, 2004
5
+ # Shuichi Kawashima <shuichi@hgc.jp>,
6
+ # Toshiaki Katayama <k@bioruby.org>
7
+ # License:: LGPL
8
+ #
9
+ # $Id: das.rb,v 1.11 2005/11/14 02:01:54 k Exp $
10
+ #
11
+ #--
12
+ # == TODO
13
+ #
14
+ # link, stylesheet
15
+ #
16
+ #++
17
+ #
18
+ #--
19
+ #
20
+ # This library is free software; you can redistribute it and/or
21
+ # modify it under the terms of the GNU Lesser General Public
22
+ # License as published by the Free Software Foundation; either
23
+ # version 2 of the License, or (at your option) any later version.
24
+ #
25
+ # This library is distributed in the hope that it will be useful,
26
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
27
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28
+ # Lesser General Public License for more details.
29
+ #
30
+ # You should have received a copy of the GNU Lesser General Public
31
+ # License along with this library; if not, write to the Free Software
32
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
33
+ #
34
+ #++
35
+ #
36
+
37
+ begin
38
+ require 'rexml/document'
39
+ rescue LoadError
40
+ end
41
+ require 'uri'
42
+ require 'net/http'
43
+ require 'bio/sequence'
44
+
45
+
46
+ module Bio
47
+
48
+ class DAS
49
+
50
+ # Specify DAS server to connect
51
+ def initialize(url = 'http://www.wormbase.org:80/db/')
52
+ schema, user, host, port, reg, path, = URI.split(url)
53
+ @server = Net::HTTP.new(host, port)
54
+ @prefix = path ? path.chomp('/') : ''
55
+ end
56
+
57
+ # Returns an Array of Bio::DAS::DSN
58
+ def get_dsn
59
+ ary = []
60
+ result, = @server.get(@prefix + '/das/dsn')
61
+ doc = REXML::Document.new(result.body)
62
+ doc.elements.each('/descendant::DSN') do |e|
63
+ dsn = DSN.new
64
+ e.elements.each do |e|
65
+ case e.name
66
+ when 'SOURCE'
67
+ dsn.source = e.text
68
+ dsn.source_id = e.attributes['id']
69
+ dsn.source_version = e.attributes['version']
70
+ when 'MAPMASTER'
71
+ dsn.mapmaster = e.name
72
+ when 'DESCRIPTION'
73
+ dsn.description = e.text
74
+ dsn.description_href = e.attributes['href']
75
+ end
76
+ end
77
+ ary << dsn
78
+ end
79
+ ary
80
+ end
81
+
82
+ # Returns Bio::DAS::ENTRY_POINT.
83
+ # The 'dsn' can be a String or a Bio::DAS::DSN object.
84
+ def get_entry_point(dsn)
85
+ entry_point = ENTRY_POINT.new
86
+ dsn = dsn.source if dsn.instance_of?(Bio::DAS::DSN)
87
+ result, = @server.get(@prefix + '/das/' + dsn + '/entry_points')
88
+ doc = REXML::Document.new(result.body)
89
+ doc.elements.each('/descendant::ENTRY_POINTS') do |e|
90
+ entry_point.href = e.attributes['href']
91
+ entry_point.version = e.attributes['version']
92
+ e.elements.each do |e|
93
+ segment = SEGMENT.new
94
+ segment.entry_id = e.attributes['id']
95
+ segment.start = e.attributes['start']
96
+ segment.stop = e.attributes['stop']
97
+ segment.stop = e.attributes['orientation']
98
+ segment.subparts = e.attributes['subparts']
99
+ segment.description = e.text
100
+ entry_point.segments << segment
101
+ end
102
+ end
103
+ entry_point
104
+ end
105
+
106
+ # Returns an Array of Bio::DAS::DNA.
107
+ # The 'dsn' can be a String or a Bio::DAS::DSN object.
108
+ # The 'segments' can be a Bio::DAS::SEGMENT object or an Array of
109
+ # Bio::DAS::SEGMENT
110
+ def get_dna(dsn, segments)
111
+ ary = []
112
+
113
+ dsn = dsn.source if dsn.instance_of?(DSN)
114
+ segments = [segments] if segments.instance_of?(SEGMENT)
115
+
116
+ opts = []
117
+ segments.each do |s|
118
+ opts << "segment=#{s.entry_id}:#{s.start},#{s.stop}"
119
+ end
120
+ query = opts.join(';')
121
+
122
+ result, = @server.get(@prefix + '/das/' + dsn + '/dna?' + query)
123
+ doc = REXML::Document.new(result.body)
124
+ doc.elements.each('/descendant::SEQUENCE') do |e|
125
+ sequence = DNA.new
126
+ sequence.entry_id = e.attributes['id']
127
+ sequence.start = e.attributes['start']
128
+ sequence.stop = e.attributes['stop']
129
+ sequence.version = e.attributes['version']
130
+ e.elements.each do |e|
131
+ sequence.sequence = Bio::Sequence::NA.new(e.text)
132
+ sequence.length = e.attributes['length'].to_i
133
+ end
134
+ ary << sequence
135
+ end
136
+ ary
137
+ end
138
+
139
+ # Returns an Array of Bio::DAS::SEQUENCE.
140
+ # The 'dsn' can be a String or a Bio::DAS::DSN object.
141
+ # The 'segments' can be a Bio::DAS::SEGMENT object or an Array of
142
+ # Bio::DAS::SEGMENT
143
+ def get_sequence(dsn, segments)
144
+ ary = []
145
+
146
+ dsn = dsn.source if dsn.instance_of?(DSN)
147
+ segments = [segments] if segments.instance_of?(SEGMENT)
148
+
149
+ opts = []
150
+ segments.each do |s|
151
+ opts << "segment=#{s.entry_id}:#{s.start},#{s.stop}"
152
+ end
153
+ query = opts.join(';')
154
+
155
+ result, = @server.get(@prefix + '/das/' + dsn + '/sequence?' + query)
156
+ doc = REXML::Document.new(result.body)
157
+ doc.elements.each('/descendant::SEQUENCE') do |e|
158
+ sequence = SEQUENCE.new
159
+ sequence.entry_id = e.attributes['id']
160
+ sequence.start = e.attributes['start']
161
+ sequence.stop = e.attributes['stop']
162
+ sequence.moltype = e.attributes['moltype']
163
+ sequence.version = e.attributes['version']
164
+ case sequence.moltype
165
+ when /dna|rna/i # 'DNA', 'ssRNA', 'dsRNA'
166
+ sequence.sequence = Bio::Sequence::NA.new(e.text)
167
+ when /protein/i # 'Protein
168
+ sequence.sequence = Bio::Sequence::AA.new(e.text)
169
+ else
170
+ sequence.sequence = e.text
171
+ end
172
+ ary << sequence
173
+ end
174
+ ary
175
+ end
176
+
177
+ # Returns a Bio::DAS::TYPES object.
178
+ # The 'dsn' can be a String or a Bio::DAS::DSN object.
179
+ # The 'segments' is optional and can be a Bio::DAS::SEGMENT object or
180
+ # an Array of Bio::DAS::SEGMENT
181
+ def get_types(dsn, segments = []) # argument 'type' is deprecated
182
+ types = TYPES.new
183
+
184
+ dsn = dsn.source if dsn.instance_of?(DSN)
185
+ segments = [segments] if segments.instance_of?(SEGMENT)
186
+
187
+ opts = []
188
+ segments.each do |s|
189
+ opts << "segment=#{s.entry_id}:#{s.start},#{s.stop}"
190
+ end
191
+ query = opts.join(';')
192
+
193
+ result, = @server.get(@prefix + '/das/' + dsn + '/types?' + query)
194
+ doc = REXML::Document.new(result.body)
195
+ doc.elements.each('/descendant::GFF') do |e|
196
+ types.version = e.attributes['version']
197
+ types.href = e.attributes['href']
198
+ e.elements.each do |e|
199
+ segment = SEGMENT.new
200
+ segment.entry_id = e.attributes['id']
201
+ segment.start = e.attributes['start']
202
+ segment.stop = e.attributes['stop']
203
+ segment.version = e.attributes['version']
204
+ segment.label = e.attributes['label']
205
+ e.elements.each do |e|
206
+ t = TYPE.new
207
+ t.entry_id = e.attributes['id']
208
+ t.method = e.attributes['method']
209
+ t.category = e.attributes['category']
210
+ t.count = e.text.to_i
211
+ segment.types << t
212
+ end
213
+ types.segments << segment
214
+ end
215
+ end
216
+ types
217
+ end
218
+
219
+ # Returns a Bio::DAS::GFF object.
220
+ # The 'dsn' can be a String or a Bio::DAS::DSN object.
221
+ # The 'segments' is optional and can be a Bio::DAS::SEGMENT object or
222
+ # an Array of Bio::DAS::SEGMENT
223
+ def get_features(dsn, segments = [], categorize = false, feature_ids = [], group_ids = [])
224
+ # arguments 'type' and 'category' are deprecated
225
+ gff = GFF.new
226
+
227
+ dsn = dsn.source if dsn.instance_of?(DSN)
228
+ segments = [segments] if segments.instance_of?(SEGMENT)
229
+
230
+ opts = []
231
+ segments.each do |s|
232
+ opts << "segment=#{s.entry_id}:#{s.start},#{s.stop}"
233
+ end
234
+ if categorize
235
+ opts << "categorize=yes" # default is 'no'
236
+ end
237
+ feature_ids.each do |fid|
238
+ opts << "feature_id=#{fid}"
239
+ end
240
+ group_ids.each do |gid|
241
+ opts << "group_id=#{gid}"
242
+ end
243
+ query = opts.join(';')
244
+
245
+ result, = @server.get(@prefix + '/das/' + dsn + '/features?' + query)
246
+ doc = REXML::Document.new(result.body)
247
+ doc.elements.each('/descendant::GFF') do |e|
248
+ gff.version = e.attributes['version']
249
+ gff.href = e.attributes['href']
250
+ e.elements.each('SEGMENT') do |e|
251
+ segment = SEGMENT.new
252
+ segment.entry_id = e.attributes['id']
253
+ segment.start = e.attributes['start']
254
+ segment.stop = e.attributes['stop']
255
+ segment.version = e.attributes['version']
256
+ segment.label = e.attributes['label']
257
+ e.elements.each do |e|
258
+ feature = FEATURE.new
259
+ feature.entry_id = e.attributes['id']
260
+ feature.label = e.attributes['label']
261
+ e.elements.each do |e|
262
+ case e.name
263
+ when 'TYPE'
264
+ type = TYPE.new
265
+ type.entry_id = e.attributes['id']
266
+ type.category = e.attributes['category']
267
+ type.reference = e.attributes['referrence']
268
+ type.label = e.text
269
+ feature.types << type
270
+ when 'METHOD'
271
+ feature.method_id = e.attributes['id']
272
+ feature.method = e.text
273
+ when 'START'
274
+ feature.start = e.text
275
+ when 'STOP', 'END'
276
+ feature.stop = e.text
277
+ when 'SCORE'
278
+ feature.score = e.text
279
+ when 'ORIENTATION'
280
+ feature.orientation = e.text
281
+ when 'PHASE'
282
+ feature.phase = e.text
283
+ when 'NOTE'
284
+ feature.notes << e.text
285
+ when 'LINK'
286
+ link = LINK.new
287
+ link.href = e.attributes['href']
288
+ link.text = e.text
289
+ feature.links << link
290
+ when 'TARGET'
291
+ target = TARGET.new
292
+ target.entry_id = e.attributes['id']
293
+ target.start = e.attributes['start']
294
+ target.stop = e.attributes['stop']
295
+ target.name = e.text
296
+ feature.targets << target
297
+ when 'GROUP'
298
+ group = GROUP.new
299
+ group.entry_id = e.attributes['id']
300
+ group.label = e.attributes['label']
301
+ group.type = e.attributes['type']
302
+ e.elements.each do |e|
303
+ case e.name
304
+ when 'NOTE' # in GROUP
305
+ group.notes << e.text
306
+ when 'LINK' # in GROUP
307
+ link = LINK.new
308
+ link.href = e.attributes['href']
309
+ link.text = e.text
310
+ group.links << link
311
+ when 'TARGET' # in GROUP
312
+ target = TARGET.new
313
+ target.entry_id = e.attributes['id']
314
+ target.start = e.attributes['start']
315
+ target.stop = e.attributes['stop']
316
+ target.name = e.text
317
+ group.targets << target
318
+ end
319
+ end
320
+ feature.groups << group
321
+ end
322
+ end
323
+ segment.features << feature
324
+ end
325
+ gff.segments << segment
326
+ end
327
+ end
328
+ gff
329
+ end
330
+
331
+
332
+ class DSN
333
+ attr_accessor :source, :source_id, :source_version,
334
+ :mapmaster, :description, :description_href
335
+ end
336
+
337
+ class ENTRY_POINT
338
+ def initialize
339
+ @segments = Array.new
340
+ end
341
+ attr_reader :segments
342
+ attr_accessor :href, :version
343
+
344
+ def each
345
+ @segments.each do |x|
346
+ yield x
347
+ end
348
+ end
349
+ end
350
+
351
+ class SEGMENT
352
+ def self.region(entry_id, start, stop)
353
+ segment = self.new
354
+ segment.entry_id = entry_id
355
+ segment.start = start
356
+ segment.stop = stop
357
+ return segment
358
+ end
359
+
360
+ def initialize
361
+ @features = Array.new # for FEATURE
362
+ @types = Array.new # for TYPE
363
+ end
364
+ attr_accessor :entry_id, :start, :stop, :orientation, :description,
365
+ :subparts, # optional
366
+ :features, :version, :label, # for FEATURE
367
+ :types # for TYPE
368
+ end
369
+
370
+ class DNA
371
+ attr_accessor :entry_id, :start, :stop, :version, :sequence, :length
372
+ end
373
+
374
+ class SEQUENCE
375
+ attr_accessor :entry_id, :start, :stop, :moltype, :version, :sequence
376
+ end
377
+
378
+ class TYPES < ENTRY_POINT; end
379
+
380
+ class TYPE
381
+ attr_accessor :entry_id, :method, :category, :count,
382
+ :reference, :label # for FEATURE
383
+ end
384
+
385
+ class GFF
386
+ def initialize
387
+ @segments = Array.new
388
+ end
389
+ attr_reader :segments
390
+ attr_accessor :version, :href
391
+ end
392
+
393
+ class FEATURE
394
+ def initialize
395
+ @notes = Array.new
396
+ @links = Array.new
397
+ @types = Array.new
398
+ @targets = Array.new
399
+ @groups = Array.new
400
+ end
401
+ attr_accessor :entry_id, :label,
402
+ :method_id, :method, :start, :stop, :score, :orientation, :phase
403
+ attr_reader :notes, :links, :types, :targets, :groups
404
+ end
405
+
406
+ class LINK
407
+ attr_accessor :href, :text
408
+ end
409
+
410
+ class TARGET
411
+ attr_accessor :entry_id, :start, :stop, :name
412
+ end
413
+
414
+ class GROUP
415
+ def initialize
416
+ @notes = Array.new
417
+ @links = Array.new
418
+ @targets = Array.new
419
+ end
420
+ attr_accessor :entry_id, :label, :type
421
+ attr_reader :notes, :links, :targets
422
+ end
423
+
424
+ end
425
+
426
+ end # module Bio
427
+
428
+
429
+ if __FILE__ == $0
430
+
431
+ # begin
432
+ # require 'pp'
433
+ # alias p pp
434
+ # rescue LoadError
435
+ # end
436
+
437
+ puts "### WormBase"
438
+ wormbase = Bio::DAS.new('http://www.wormbase.org/db/')
439
+
440
+ puts ">>> test get_dsn"
441
+ p wormbase.get_dsn
442
+
443
+ puts ">>> create segment obj Bio::DAS::SEGMENT.region('I', 1, 1000)"
444
+ seg = Bio::DAS::SEGMENT.region('I', 1, 1000)
445
+ p seg
446
+
447
+ puts ">>> test get_dna"
448
+ p wormbase.get_dna('elegans', seg)
449
+
450
+ puts "### test get_features"
451
+ p wormbase.get_features('elegans', seg)
452
+
453
+ puts "### KEGG DAS"
454
+ kegg_das = Bio::DAS.new("http://das.hgc.jp/cgi-bin/")
455
+
456
+ dsn_list = kegg_das.get_dsn
457
+ org_list = dsn_list.collect {|x| x.source}
458
+
459
+ puts ">>> dsn : entry_points"
460
+ org_list.each do |org|
461
+ print "#{org} : "
462
+ list = kegg_das.get_entry_point(org)
463
+ list.segments.each do |seg|
464
+ print " #{seg.entry_id}"
465
+ end
466
+ puts
467
+ end
468
+
469
+ end
470
+
471
+