bio 1.4.3.0001 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +7 -0
  2. data/.travis.yml +39 -33
  3. data/BSDL +22 -0
  4. data/COPYING +2 -2
  5. data/COPYING.ja +36 -36
  6. data/ChangeLog +2404 -1025
  7. data/KNOWN_ISSUES.rdoc +15 -55
  8. data/README.rdoc +17 -23
  9. data/RELEASE_NOTES.rdoc +246 -183
  10. data/Rakefile +3 -2
  11. data/bin/br_biofetch.rb +29 -5
  12. data/bioruby.gemspec +15 -32
  13. data/bioruby.gemspec.erb +10 -20
  14. data/doc/ChangeLog-1.4.3 +1478 -0
  15. data/doc/RELEASE_NOTES-1.4.3.rdoc +204 -0
  16. data/doc/Tutorial.rd +0 -6
  17. data/doc/Tutorial.rd.html +7 -12
  18. data/doc/Tutorial.rd.ja +960 -1064
  19. data/doc/Tutorial.rd.ja.html +977 -1067
  20. data/gemfiles/Gemfile.travis-jruby1.8 +2 -1
  21. data/gemfiles/Gemfile.travis-jruby1.9 +2 -4
  22. data/gemfiles/Gemfile.travis-rbx +13 -0
  23. data/gemfiles/Gemfile.travis-ruby1.8 +2 -1
  24. data/gemfiles/Gemfile.travis-ruby1.9 +2 -4
  25. data/gemfiles/Gemfile.travis-ruby2.2 +9 -0
  26. data/lib/bio.rb +10 -43
  27. data/lib/bio/alignment.rb +8 -14
  28. data/lib/bio/appl/blast.rb +1 -2
  29. data/lib/bio/appl/blast/format0.rb +18 -7
  30. data/lib/bio/appl/blast/remote.rb +0 -9
  31. data/lib/bio/appl/blast/report.rb +1 -1
  32. data/lib/bio/appl/clustalw/report.rb +3 -1
  33. data/lib/bio/appl/genscan/report.rb +1 -2
  34. data/lib/bio/appl/iprscan/report.rb +1 -2
  35. data/lib/bio/appl/meme/mast.rb +4 -4
  36. data/lib/bio/appl/meme/mast/report.rb +1 -1
  37. data/lib/bio/appl/paml/codeml.rb +2 -2
  38. data/lib/bio/appl/paml/codeml/report.rb +1 -0
  39. data/lib/bio/appl/paml/common.rb +1 -1
  40. data/lib/bio/appl/sosui/report.rb +1 -2
  41. data/lib/bio/command.rb +62 -2
  42. data/lib/bio/data/aa.rb +13 -31
  43. data/lib/bio/data/codontable.rb +1 -2
  44. data/lib/bio/db/biosql/biosql_to_biosequence.rb +1 -0
  45. data/lib/bio/db/biosql/sequence.rb +1 -1
  46. data/lib/bio/db/embl/common.rb +1 -1
  47. data/lib/bio/db/embl/embl.rb +5 -4
  48. data/lib/bio/db/embl/format_embl.rb +3 -3
  49. data/lib/bio/db/embl/sptr.rb +9 -1444
  50. data/lib/bio/db/embl/swissprot.rb +12 -29
  51. data/lib/bio/db/embl/trembl.rb +13 -30
  52. data/lib/bio/db/embl/uniprot.rb +12 -29
  53. data/lib/bio/db/embl/uniprotkb.rb +1455 -0
  54. data/lib/bio/db/fasta.rb +17 -0
  55. data/lib/bio/db/fasta/defline.rb +1 -3
  56. data/lib/bio/db/fastq.rb +1 -1
  57. data/lib/bio/db/genbank/ddbj.rb +9 -5
  58. data/lib/bio/db/genbank/refseq.rb +11 -3
  59. data/lib/bio/db/gff.rb +3 -4
  60. data/lib/bio/db/go.rb +5 -6
  61. data/lib/bio/db/kegg/module.rb +4 -5
  62. data/lib/bio/db/kegg/pathway.rb +4 -5
  63. data/lib/bio/db/kegg/reaction.rb +1 -1
  64. data/lib/bio/db/nexus.rb +3 -2
  65. data/lib/bio/db/pdb/pdb.rb +2 -2
  66. data/lib/bio/db/phyloxml/phyloxml_elements.rb +82 -59
  67. data/lib/bio/db/phyloxml/phyloxml_parser.rb +2 -2
  68. data/lib/bio/db/phyloxml/phyloxml_writer.rb +1 -2
  69. data/lib/bio/db/sanger_chromatogram/chromatogram.rb +1 -2
  70. data/lib/bio/db/transfac.rb +1 -1
  71. data/lib/bio/io/das.rb +40 -41
  72. data/lib/bio/io/fastacmd.rb +0 -16
  73. data/lib/bio/io/fetch.rb +111 -55
  74. data/lib/bio/io/flatfile/buffer.rb +4 -5
  75. data/lib/bio/io/hinv.rb +2 -3
  76. data/lib/bio/io/ncbirest.rb +43 -6
  77. data/lib/bio/io/pubmed.rb +76 -81
  78. data/lib/bio/io/togows.rb +33 -10
  79. data/lib/bio/map.rb +1 -1
  80. data/lib/bio/pathway.rb +1 -1
  81. data/lib/bio/sequence/compat.rb +1 -1
  82. data/lib/bio/sequence/na.rb +63 -12
  83. data/lib/bio/shell.rb +0 -2
  84. data/lib/bio/shell/core.rb +5 -6
  85. data/lib/bio/shell/interface.rb +3 -4
  86. data/lib/bio/shell/irb.rb +1 -2
  87. data/lib/bio/shell/plugin/entry.rb +2 -3
  88. data/lib/bio/shell/plugin/seq.rb +7 -6
  89. data/lib/bio/shell/setup.rb +1 -2
  90. data/lib/bio/tree.rb +2 -2
  91. data/lib/bio/util/contingency_table.rb +0 -2
  92. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +2 -2
  93. data/lib/bio/util/sirna.rb +76 -16
  94. data/lib/bio/version.rb +8 -9
  95. data/sample/benchmark_clustalw_report.rb +47 -0
  96. data/sample/biofetch.rb +248 -151
  97. data/setup.rb +6 -7
  98. data/test/data/clustalw/example1-seqnos.aln +58 -0
  99. data/test/network/bio/appl/blast/test_remote.rb +1 -15
  100. data/test/network/bio/appl/test_blast.rb +0 -12
  101. data/test/network/bio/io/test_pubmed.rb +49 -0
  102. data/test/network/bio/io/test_togows.rb +0 -1
  103. data/test/network/bio/test_command.rb +65 -2
  104. data/test/unit/bio/appl/bl2seq/test_report.rb +0 -1
  105. data/test/unit/bio/appl/blast/test_report.rb +110 -48
  106. data/test/unit/bio/appl/clustalw/test_report.rb +67 -51
  107. data/test/unit/bio/appl/sim4/test_report.rb +46 -17
  108. data/test/unit/bio/appl/test_blast.rb +2 -2
  109. data/test/unit/bio/db/embl/test_embl.rb +0 -1
  110. data/test/unit/bio/db/embl/test_embl_rel89.rb +0 -1
  111. data/test/unit/bio/db/embl/{test_sptr.rb → test_uniprotkb.rb} +111 -115
  112. data/test/unit/bio/db/embl/{test_uniprot_new_part.rb → test_uniprotkb_new_part.rb} +11 -11
  113. data/test/unit/bio/db/genbank/test_genbank.rb +10 -4
  114. data/test/unit/bio/db/pdb/test_pdb.rb +14 -8
  115. data/test/unit/bio/db/test_fasta.rb +41 -1
  116. data/test/unit/bio/db/test_fastq.rb +14 -4
  117. data/test/unit/bio/db/test_gff.rb +2 -2
  118. data/test/unit/bio/db/test_phyloxml.rb +30 -30
  119. data/test/unit/bio/db/test_phyloxml_writer.rb +2 -2
  120. data/test/unit/bio/io/flatfile/test_autodetection.rb +1 -2
  121. data/test/unit/bio/io/flatfile/test_buffer.rb +7 -1
  122. data/test/unit/bio/io/flatfile/test_splitter.rb +1 -1
  123. data/test/unit/bio/io/test_togows.rb +3 -2
  124. data/test/unit/bio/sequence/test_dblink.rb +1 -1
  125. data/test/unit/bio/sequence/test_na.rb +3 -1
  126. data/test/unit/bio/test_alignment.rb +1 -2
  127. data/test/unit/bio/test_command.rb +5 -4
  128. data/test/unit/bio/test_db.rb +4 -2
  129. data/test/unit/bio/test_pathway.rb +25 -10
  130. data/test/unit/bio/util/test_sirna.rb +22 -22
  131. metadata +656 -1430
  132. data/doc/KEGG_API.rd +0 -1843
  133. data/doc/KEGG_API.rd.ja +0 -1834
  134. data/extconf.rb +0 -2
  135. data/lib/bio/appl/blast/ddbj.rb +0 -131
  136. data/lib/bio/db/kegg/taxonomy.rb +0 -280
  137. data/lib/bio/io/dbget.rb +0 -194
  138. data/lib/bio/io/ddbjrest.rb +0 -344
  139. data/lib/bio/io/ddbjxml.rb +0 -458
  140. data/lib/bio/io/ebisoap.rb +0 -158
  141. data/lib/bio/io/ensembl.rb +0 -229
  142. data/lib/bio/io/higet.rb +0 -73
  143. data/lib/bio/io/keggapi.rb +0 -363
  144. data/lib/bio/io/ncbisoap.rb +0 -156
  145. data/lib/bio/io/soapwsdl.rb +0 -119
  146. data/lib/bio/shell/plugin/keggapi.rb +0 -181
  147. data/lib/bio/shell/plugin/soap.rb +0 -87
  148. data/sample/dbget +0 -37
  149. data/sample/demo_ddbjxml.rb +0 -212
  150. data/sample/demo_kegg_taxonomy.rb +0 -92
  151. data/sample/demo_keggapi.rb +0 -502
  152. data/sample/psortplot_html.rb +0 -214
  153. data/test/network/bio/io/test_ddbjrest.rb +0 -47
  154. data/test/network/bio/io/test_ensembl.rb +0 -230
  155. data/test/network/bio/io/test_soapwsdl.rb +0 -53
  156. data/test/unit/bio/io/test_ddbjxml.rb +0 -81
  157. data/test/unit/bio/io/test_ensembl.rb +0 -111
  158. data/test/unit/bio/io/test_soapwsdl.rb +0 -33
data/extconf.rb DELETED
@@ -1,2 +0,0 @@
1
- require 'mkmf'
2
- create_makefile("bioruby")
@@ -1,131 +0,0 @@
1
- #
2
- # = bio/appl/blast/ddbj.rb - Remote BLAST wrapper using DDBJ web service
3
- #
4
- # Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org>
5
- # License:: The Ruby License
6
- #
7
-
8
- require 'bio/io/ddbjrest'
9
-
10
- module Bio::Blast::Remote
11
-
12
- # Remote BLAST factory using DDBJ Web API for Biology
13
- # (http://xml.nig.ac.jp/).
14
- #
15
- module DDBJ
16
-
17
- # Creates a remote BLAST factory using DDBJ.
18
- # Returns Bio::Blast object.
19
- #
20
- # Note for future improvement: In the future, it might return
21
- # Bio::Blast::Remote::DDBJ or other object.
22
- #
23
- def self.new(program, db, options = [])
24
- Bio::Blast.new(program, db, options, 'ddbj')
25
- end
26
-
27
- # Information about DDBJ BLAST.
28
- module Information
29
-
30
- include Bio::Blast::Remote::Information
31
-
32
- # (private) parse database information
33
- def _parse_databases
34
- if defined? @parse_databases
35
- return nil if @parse_databases
36
- end
37
- drv = Bio::DDBJ::REST::Blast.new
38
- str = drv.getSupportDatabaseList
39
-
40
- databases = {}
41
- dbdescs = {}
42
- keys = [ 'blastn', 'blastp' ]
43
- keys.each do |key|
44
- databases[key] ||= []
45
- dbdescs[key] ||= {}
46
- end
47
- prefix = ''
48
- prefix_count = 0
49
- str.each_line do |line|
50
- a = line.strip.split(/\s*\-\s*/, 2)
51
- case a.size
52
- when 1
53
- prefix = a[0].to_s.strip
54
- prefix += ': ' unless prefix.empty?
55
- prefix_count = 0
56
- next #each_line
57
- when 0
58
- prefix = '' if prefix_count > 0
59
- next #each_line
60
- end
61
- name = a[0].to_s.strip.freeze
62
- desc = a[1].to_s.strip
63
- key = case desc
64
- when /\(NT\)\s*$/
65
- 'blastn'
66
- when /\(AA\)\s*$/
67
- 'blastp'
68
- else
69
- warn "DDBJ BLAST: could not determine the database is NT or AA: #{line.chomp}" if $VERBOSE
70
- next #each_line
71
- end
72
- desc = (prefix + desc).freeze
73
- prefix_count += 1
74
- databases[key].push name
75
- dbdescs[key][name] = desc
76
- end
77
-
78
- databases['blastp'] ||= []
79
- dbdescs['blastp'] ||= []
80
-
81
- databases['blastn'].freeze
82
- databases['blastp'].freeze
83
-
84
- databases['blastx'] = databases['blastp']
85
- dbdescs['blastx'] = dbdescs['blastp']
86
- databases['tblastn'] = databases['blastn']
87
- dbdescs['tblastn'] = dbdescs['blastn']
88
- databases['tblastx'] = databases['blastn']
89
- dbdescs['tblastx'] = dbdescs['blastn']
90
-
91
- @databases = databases
92
- @database_descriptions = dbdescs
93
- @parse_databases = true
94
- true
95
- end
96
- private :_parse_databases
97
-
98
- end #module Information
99
-
100
- extend Information
101
-
102
- # executes BLAST and returns result as a string
103
- def exec_ddbj(query)
104
- options = make_command_line_options
105
- opt = Bio::Blast::NCBIOptions.new(options)
106
-
107
- # REST objects are cached
108
- @ddbj_remote_blast ||= Bio::DDBJ::REST::Blast.new
109
- @ddbj_request_manager ||= Bio::DDBJ::REST::RequestManager.new
110
-
111
- program = opt.delete('-p')
112
- db = opt.delete('-d')
113
- optstr = Bio::Command.make_command_line_unix(opt.options)
114
-
115
- # using searchParamAsync
116
- qid = @ddbj_remote_blast.searchParamAsync(program, db, query, optstr)
117
- @output = qid
118
-
119
- result = @ddbj_request_manager.wait_getAsyncResult(qid)
120
-
121
- @output = result
122
- return @output
123
- end
124
-
125
- end #module DDBJ
126
-
127
- # for lazy load DDBJ module
128
- Ddbj = DDBJ
129
-
130
- end #module Bio::Blast::Remote
131
-
@@ -1,280 +0,0 @@
1
- #
2
- # = bio/db/kegg/taxonomy.rb - KEGG taxonomy parser class
3
- #
4
- # Copyright:: Copyright (C) 2007 Toshiaki Katayama <k@bioruby.org>
5
- # License:: The Ruby License
6
- #
7
- # $Id:$
8
- #
9
-
10
- module Bio
11
- class KEGG
12
-
13
- # == Description
14
- #
15
- # Parse the KEGG 'taxonomy' file which describes taxonomic classification
16
- # of organisms.
17
- #
18
- # == References
19
- #
20
- # The KEGG 'taxonomy' file is available at
21
- #
22
- # * ftp://ftp.genome.jp/pub/kegg/genes/taxonomy
23
- #
24
- class Taxonomy
25
-
26
- def initialize(filename, orgs = [])
27
- # Stores the taxonomic tree as a linked list (implemented in Hash), so
28
- # every node need to have unique name (key) to work correctly
29
- @tree = Hash.new
30
-
31
- # Also stores the taxonomic tree as a list of arrays (full path)
32
- @path = Array.new
33
-
34
- # Also stores all leaf nodes (organism codes) of every intermediate nodes
35
- @leaves = Hash.new
36
-
37
- # tentative name for the root node (use accessor to change)
38
- @root = 'Genes'
39
-
40
- hier = Array.new
41
- level = 0
42
- label = nil
43
-
44
- File.open(filename).each do |line|
45
- next if line.strip.empty?
46
-
47
- # line for taxonomic hierarchy (indent according to the number of # marks)
48
- if line[/^#/]
49
- level = line[/^#+/].length
50
- label = line[/[A-z].*/]
51
- hier[level] = sanitize(label)
52
-
53
- # line for organims name (unify different strains of a species)
54
- else
55
- tax, org, name, desc = line.chomp.split("\t")
56
- if orgs.nil? or orgs.empty? or orgs.include?(org)
57
- species, strain, = name.split('_')
58
- # (0) Grouping of the strains of the same species.
59
- # If the name of species is the same as the previous line,
60
- # add the species to the same species group.
61
- # ex. Gamma/enterobacteria has a large number of organisms,
62
- # so sub grouping of strains is needed for E.coli strains etc.
63
- #
64
- # However, if the species name is already used, need to avoid
65
- # collision of species name as the current implementation stores
66
- # the tree as a Hash, which may cause the infinite loop.
67
- #
68
- # (1) If species name == the intermediate node of other lineage
69
- # Add '_sp' to the species name to avoid the conflict (1-1), and if
70
- # 'species_sp' is already taken, use 'species_strain' instead (1-2).
71
- # ex. Bacteria/Proteobacteria/Beta/T.denitrificans/tbd
72
- # Bacteria/Proteobacteria/Epsilon/T.denitrificans_ATCC33889/tdn
73
- # -> Bacteria/Proteobacteria/Beta/T.denitrificans/tbd
74
- # Bacteria/Proteobacteria/Epsilon/T.denitrificans_sp/tdn
75
- #
76
- # (2) If species name == the intermediate node of the same lineage
77
- # Add '_sp' to the species name to avoid the conflict.
78
- # ex. Bacteria/Cyanobacgteria/Cyanobacteria_CYA/cya
79
- # Bacteria/Cyanobacgteria/Cyanobacteria_CYB/cya
80
- # Bacteria/Proteobacteria/Magnetococcus/Magnetococcus_MC1/mgm
81
- # -> Bacteria/Cyanobacgteria/Cyanobacteria_sp/cya
82
- # Bacteria/Cyanobacgteria/Cyanobacteria_sp/cya
83
- # Bacteria/Proteobacteria/Magnetococcus/Magnetococcus_sp/mgm
84
- sp_group = "#{species}_sp"
85
- if @tree[species]
86
- if hier[level+1] == species
87
- # case (0)
88
- else
89
- # case (1-1)
90
- species = sp_group
91
- # case (1-2)
92
- if @tree[sp_group] and hier[level+1] != species
93
- species = name
94
- end
95
- end
96
- else
97
- if hier[level] == species
98
- # case (2)
99
- species = sp_group
100
- end
101
- end
102
- # 'hier' is an array of the taxonomic tree + species and strain name.
103
- # ex. [nil, Eukaryotes, Fungi, Ascomycetes, Saccharomycetes] +
104
- # [S_cerevisiae, sce]
105
- hier[level+1] = species # sanitize(species)
106
- hier[level+2] = org
107
- ary = hier[1, level+2]
108
- warn ary.inspect if $DEBUG
109
- add_to_tree(ary)
110
- add_to_leaves(ary)
111
- add_to_path(ary)
112
- end
113
- end
114
- end
115
- return tree
116
- end
117
-
118
- attr_reader :tree
119
- attr_reader :path
120
- attr_reader :leaves
121
- attr_accessor :root
122
-
123
- def organisms(group)
124
- @leaves[group]
125
- end
126
-
127
- # Add a new path [node, subnode, subsubnode, ..., leaf] under the root node
128
- # and every intermediate nodes stores their child nodes as a Hash.
129
- def add_to_tree(ary)
130
- parent = @root
131
- ary.each do |node|
132
- @tree[parent] ||= Hash.new
133
- @tree[parent][node] = nil
134
- parent = node
135
- end
136
- end
137
-
138
- # Add a new path [node, subnode, subsubnode, ..., leaf] under the root node
139
- # and stores leaf nodes to the every intermediate nodes as an Array.
140
- def add_to_leaves(ary)
141
- leaf = ary.last
142
- ary.each do |node|
143
- @leaves[node] ||= Array.new
144
- @leaves[node] << leaf
145
- end
146
- end
147
-
148
- # Add a new path [node, subnode, subsubnode, ..., leaf] under the root node
149
- # and stores the path itself in an Array.
150
- def add_to_path(ary)
151
- @path << ary
152
- end
153
-
154
- # Compaction of intermediate nodes of the resulted taxonomic tree.
155
- # - If child node has only one child node (grandchild), make the child of
156
- # grandchild as a grandchild.
157
- # ex.
158
- # Plants / Monocotyledons / grass family / osa
159
- # --> Plants / Monocotyledons / osa
160
- #
161
- def compact(node = root)
162
- # if the node has children
163
- if subnodes = @tree[node]
164
- # obtain grandchildren for each child
165
- subnodes.keys.each do |subnode|
166
- if subsubnodes = @tree[subnode]
167
- # if the number of grandchild node is 1
168
- if subsubnodes.keys.size == 1
169
- # obtain the name of the grandchild node
170
- subsubnode = subsubnodes.keys.first
171
- # obtain the child of the grandchlid node
172
- if subsubsubnodes = @tree[subsubnode]
173
- # make the child of grandchild node as a chlid of child node
174
- @tree[subnode] = subsubsubnodes
175
- # delete grandchild node
176
- @tree[subnode].delete(subsubnode)
177
- warn "--- compact: #{subsubnode} is replaced by #{subsubsubnodes}" if $DEBUG
178
- # retry until new grandchild also needed to be compacted.
179
- retry
180
- end
181
- end
182
- end
183
- # repeat recurseively
184
- compact(subnode)
185
- end
186
- end
187
- end
188
-
189
- # Reduction of the leaf node of the resulted taxonomic tree.
190
- # - If the parent node have only one leaf node, replace parent node
191
- # with the leaf node.
192
- # ex.
193
- # Plants / Monocotyledons / osa
194
- # --> Plants / osa
195
- #
196
- def reduce(node = root)
197
- # if the node has children
198
- if subnodes = @tree[node]
199
- # obtain grandchildren for each child
200
- subnodes.keys.each do |subnode|
201
- if subsubnodes = @tree[subnode]
202
- # if the number of grandchild node is 1
203
- if subsubnodes.keys.size == 1
204
- # obtain the name of the grandchild node
205
- subsubnode = subsubnodes.keys.first
206
- # if the grandchild node is a leaf node
207
- unless @tree[subsubnode]
208
- # make the grandchild node as a child node
209
- @tree[node].update(subsubnodes)
210
- # delete child node
211
- @tree[node].delete(subnode)
212
- warn "--- reduce: #{subnode} is replaced by #{subsubnode}" if $DEBUG
213
- end
214
- end
215
- end
216
- # repeat recursively
217
- reduce(subnode)
218
- end
219
- end
220
- end
221
-
222
- # Traverse the taxonomic tree by the depth first search method
223
- # under the given (root or intermediate) node.
224
- def dfs(parent, &block)
225
- if children = @tree[parent]
226
- yield parent, children
227
- children.keys.each do |child|
228
- dfs(child, &block)
229
- end
230
- end
231
- end
232
-
233
- # Similar to the dfs method but also passes the current level of the nest
234
- # to the iterator.
235
- def dfs_with_level(parent, &block)
236
- @level ||= 0
237
- if children = @tree[parent]
238
- yield parent, children, @level
239
- @level += 1
240
- children.keys.each do |child|
241
- dfs_with_level(child, &block)
242
- end
243
- @level -= 1
244
- end
245
- end
246
-
247
- # Convert the taxonomic tree structure to a simple ascii art.
248
- def to_s
249
- result = "#{@root}\n"
250
- @tree[@root].keys.each do |node|
251
- result += ascii_tree(node, " ")
252
- end
253
- return result
254
- end
255
-
256
- private
257
-
258
- # Helper method for the to_s method.
259
- def ascii_tree(node, indent)
260
- result = "#{indent}+- #{node}\n"
261
- indent += " "
262
- @tree[node].keys.each do |child|
263
- if @tree[child]
264
- result += ascii_tree(child, indent)
265
- else
266
- result += "#{indent}+- #{child}\n"
267
- end
268
- end
269
- return result
270
- end
271
-
272
- def sanitize(str)
273
- str.gsub(/[^A-z0-9]/, '_')
274
- end
275
-
276
- end # Taxonomy
277
-
278
- end # KEGG
279
- end # Bio
280
-
@@ -1,194 +0,0 @@
1
- #
2
- # = bio/io/dbget.rb - GenomeNet/DBGET client module
3
- #
4
- # Copyright:: Copyright (C) 2000, 2001
5
- # Mitsuteru C. Nakao <n@bioruby.org>,
6
- # Toshiaki Katayama <k@bioruby.org>
7
- # License:: The Ruby License
8
- #
9
- # $Id: dbget.rb,v 1.13 2007/04/05 23:35:41 trevor Exp $
10
- #
11
- # == DBGET
12
- #
13
- # Accessing the GenomeNet/DBGET data retrieval system
14
- # http://www.genome.jp/dbget/ within the intranet.
15
- #
16
-
17
- require 'socket'
18
-
19
- module Bio
20
-
21
- class DBGET
22
-
23
- # default DBGET server address
24
- # SERV = "dbgetserv.genome.jp"
25
- SERV = "dbget.genome.jp"
26
- # default DBGET port number
27
- PORT = "3266"
28
-
29
- # Main class method to access DBGET server. Optionally, this method
30
- # can be called with the alternative DBGET server address and the
31
- # TCP/IP port number.
32
- #
33
- # 'com' should be one of the following DBGET commands:
34
- #
35
- # * alink, bfind, bget, binfo, blink, bman, bref, btab, btit
36
- #
37
- # These methods are shortcut for the dbget commands. Actually,
38
- # Bio::DBGET.((|com|))(arg) internally calls Bio::DBGET.dbget(com, arg).
39
- # Most of these methods accept the argument "-h" for help.
40
- #
41
- # 'arg' should be one of the following formats :
42
- #
43
- # * [options] db
44
- # * specify the database name only for binfo, bman etc.
45
- # * [options] db:entry
46
- # * specify the database name and the entry name to retrieve.
47
- # * [options] db entry1 entry2 ...
48
- # * specify the database name and the list of entries to retrieve.
49
- #
50
- # Note that options in the above example can be omitted. If 'arg' is
51
- # empty, the help message with a list of options for 'com' will be
52
- # shown by default. Supported database names will be found at the
53
- # GenomeNet DBGET web page http://www.genome.jp/dbget/.
54
- #
55
- def DBGET.dbget(com, arg, serv = nil, port = nil)
56
-
57
- unless serv or port # if both of serv and port are nil
58
- if ENV["DBGET"] =~ /:/ # and ENV["DBGET"] exists
59
- serv, port = ENV["DBGET"].split(':')
60
- end
61
- end
62
- serv = serv ? serv : SERV
63
- port = port ? port : PORT
64
-
65
- if arg.empty?
66
- arg = "-h" # DBGET help message
67
- end
68
-
69
- query = "#{com} #{arg}\n" # DBGET query string
70
-
71
- sock = TCPSocket.open("#{serv}", "#{port}")
72
-
73
- sock.write(query) # submit query
74
- sock.flush # buffer flush
75
-
76
- sock.gets # skip "+Helo DBgetServ ..."
77
- sock.gets # skip "#If you see this message, ..."
78
- sock.gets # skip "*Request-IDent"
79
-
80
- result = sock.read # DBGET result
81
-
82
- sock.close
83
-
84
- return result
85
- end
86
-
87
- # Show the version information of the DBGET server.
88
- def DBGET.version
89
- dbget("bget", "-V")
90
- end
91
-
92
-
93
- #--
94
- # bacc("db entry") - not supported : get accession(s)
95
- # bent("db entry") - not supported : get entry name
96
- # lmarge("db entry") - not supported
97
- #++
98
-
99
- # alink("db entry") method returns relations
100
- def DBGET.alink(arg)
101
- dbget("alink", arg)
102
- end
103
-
104
- # bfind("db keyword") method searches entries by keyword
105
- def DBGET.bfind(arg)
106
- dbget("bfind", arg)
107
- end
108
-
109
- # bget("db entry") method retrieves entries specified by the entry names
110
- def DBGET.bget(arg)
111
- dbget("bget", arg)
112
- end
113
-
114
- # seq("db entry") method retrieves the first sequence of the entry
115
- #
116
- # Shortcut to retrieve the sequence of the entry in FASTA format.
117
- # This method is equivalent to Bio::DBGET.bget("-f -n 1 #{arg}") and
118
- # 'arg' should be the "db:entry" or "db entry1 entry2 ..." format.
119
- def DBGET.seq(arg)
120
- dbget("bget", "-f -n 1 #{arg}")
121
- end
122
-
123
- # seq2("db entry") method retrieves the second sequence of the entry if any
124
- #
125
- # Shortcut to retrieve the second sequence of the entry in FASTA format.
126
- # This method is equivalent to Bio::DBGET.bget("-f -n 2 #{arg}").
127
- # Only useful when treating the KEGG GENES database entries which have
128
- # both AASEQ and NTSEQ fields. This method is obsolete and it is
129
- # recommended to use 'naseq' and 'aaseq' instead.
130
- def DBGET.seq2(arg)
131
- dbget("bget", "-f -n 2 #{arg}")
132
- end
133
-
134
- # naseq("db entry") method retrieves the nucleic acid sequence of the
135
- # entry if any.
136
- def DBGET.naseq(arg)
137
- dbget("bget", "-f -n n #{arg}")
138
- end
139
-
140
- # aaseq("db entry") method retrieves the amino acid sequence of the
141
- # entry if any.
142
- def DBGET.aaseq(arg)
143
- dbget("bget", "-f -n a #{arg}")
144
- end
145
-
146
- # binfo("db") method retrieves the database information
147
- def DBGET.binfo(arg)
148
- dbget("binfo", arg)
149
- end
150
-
151
- # blink("db entry") method retrieves the link information
152
- def DBGET.blink(arg)
153
- dbget("blink", arg)
154
- end
155
-
156
- # bman ("db entry") method shows the manual page
157
- def DBGET.bman(arg)
158
- dbget("bman", arg)
159
- end
160
-
161
- # bref("db entry") method retrieves the references and authors
162
- def DBGET.bref(arg)
163
- dbget("bref", arg)
164
- end
165
-
166
- # btab ("db entry") method retrives (and generates) the database alias table
167
- def DBGET.btab(arg)
168
- dbget("btab", arg)
169
- end
170
-
171
- # btit("db entry ..") method retrieves the entry definition
172
- def DBGET.btit(arg)
173
- dbget("btit", arg)
174
- end
175
-
176
- end
177
-
178
- end # module Bio
179
-
180
-
181
- if __FILE__ == $0
182
- puts "### DBGET version"
183
- p Bio::DBGET.version
184
- puts "### DBGET.dbget('bfind', 'sce tyrosin kinase')"
185
- puts Bio::DBGET.dbget('bfind', 'sce tyrosin kinase')
186
- puts "### DBGET.bfind('sce tyrosin kinase')"
187
- puts Bio::DBGET.bfind('sce tyrosin kinase')
188
- puts "### DBGET.bget('sce:YDL028C')"
189
- puts Bio::DBGET.bget('sce:YDL028C')
190
- puts "### DBGET.binfo('dbget')"
191
- puts Bio::DBGET.binfo('dbget')
192
- end
193
-
194
-