bio 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +4 -3
- data/lib/bio.rb +3 -3
- data/lib/bio/appl/blast/format0.rb +3 -2
- data/lib/bio/appl/blast/format8.rb +5 -3
- data/lib/bio/db/kegg/compound.rb +6 -1
- data/lib/bio/db/kegg/enzyme.rb +3 -3
- data/lib/bio/db/kegg/genes.rb +2 -2
- data/lib/bio/db/kegg/glycan.rb +5 -5
- data/lib/bio/db/kegg/orthology.rb +27 -3
- data/lib/bio/db/newick.rb +203 -55
- data/lib/bio/io/flatfile.rb +2 -2
- data/lib/bio/io/flatfile/indexer.rb +2 -2
- data/lib/bio/io/keggapi.rb +2 -1
- data/lib/bio/io/pubmed.rb +223 -81
- data/lib/bio/sequence/common.rb +6 -3
- data/lib/bio/shell/interface.rb +2 -2
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +5 -5
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +7 -8
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +1 -1
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +21 -17
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/spinner.gif +0 -0
- data/test/functional/bio/io/test_ensembl.rb +87 -4
- data/test/unit/bio/db/test_newick.rb +238 -1
- data/test/unit/bio/sequence/test_aa.rb +3 -2
- data/test/unit/bio/sequence/test_common.rb +11 -2
- data/test/unit/bio/sequence/test_na.rb +63 -1
- metadata +4 -4
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
data/lib/bio/io/flatfile.rb
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
#
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id: flatfile.rb,v 1.
|
8
|
+
# $Id: flatfile.rb,v 1.61 2007/11/15 07:07:16 k Exp $
|
9
9
|
#
|
10
10
|
#
|
11
11
|
# Bio::FlatFile is a helper and wrapper class to read a biological data file.
|
@@ -1130,7 +1130,7 @@ module Bio
|
|
1130
1130
|
genpept = RuleRegexp[ 'Bio::GenPept',
|
1131
1131
|
/^LOCUS .+ aa .+/ ],
|
1132
1132
|
medline = RuleRegexp[ 'Bio::MEDLINE',
|
1133
|
-
/^
|
1133
|
+
/^PMID\- [0-9]+$/ ],
|
1134
1134
|
embl = RuleRegexp[ 'Bio::EMBL',
|
1135
1135
|
/^ID .+\; .*(DNA|RNA|XXX)\;/ ],
|
1136
1136
|
sptr = RuleRegexp2[ 'Bio::SPTR',
|
@@ -4,7 +4,7 @@
|
|
4
4
|
# Copyright:: Copyright (C) 2002 GOTO Naohisa <ng@bioruby.org>
|
5
5
|
# License:: The Ruby License
|
6
6
|
#
|
7
|
-
# $Id: indexer.rb,v 1.
|
7
|
+
# $Id: indexer.rb,v 1.26 2007/12/11 15:13:32 ngoto Exp $
|
8
8
|
#
|
9
9
|
|
10
10
|
require 'bio/io/flatfile/index'
|
@@ -714,7 +714,7 @@ module Bio
|
|
714
714
|
|
715
715
|
##############################################################
|
716
716
|
def self.formatstring2class(format_string)
|
717
|
-
case
|
717
|
+
case format_string
|
718
718
|
when /genbank/i
|
719
719
|
dbclass = Bio::GenBank
|
720
720
|
when /genpept/i
|
data/lib/bio/io/keggapi.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
# Copyright:: Copyright (C) 2003, 2004 Toshiaki Katayama <k@bioruby.org>
|
5
5
|
# License:: The Ruby License
|
6
6
|
#
|
7
|
-
# $Id: keggapi.rb,v 1.
|
7
|
+
# $Id: keggapi.rb,v 1.15 2007/07/20 21:56:45 k Exp $
|
8
8
|
#
|
9
9
|
|
10
10
|
require 'bio/io/soapwsdl'
|
@@ -331,6 +331,7 @@ class API < Bio::SOAPWSDL
|
|
331
331
|
def add_filter(results)
|
332
332
|
if results.is_a?(Array)
|
333
333
|
results.each do |result|
|
334
|
+
next if result.is_a?(Fixnum)
|
334
335
|
def result.filter(fields)
|
335
336
|
fields.collect { |field| self.send(field) }
|
336
337
|
end
|
data/lib/bio/io/pubmed.rb
CHANGED
@@ -1,16 +1,15 @@
|
|
1
1
|
#
|
2
2
|
# = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
|
3
3
|
#
|
4
|
-
# Copyright:: Copyright (C) 2001 Toshiaki Katayama <k@bioruby.org>
|
4
|
+
# Copyright:: Copyright (C) 2001, 2007 Toshiaki Katayama <k@bioruby.org>
|
5
5
|
# Copyright:: Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id: pubmed.rb,v 1.
|
8
|
+
# $Id: pubmed.rb,v 1.23 2007/12/12 13:53:26 k Exp $
|
9
9
|
#
|
10
10
|
|
11
|
-
require 'net/http'
|
12
|
-
require 'cgi' unless defined?(CGI)
|
13
11
|
require 'bio/command'
|
12
|
+
require 'cgi' unless defined?(CGI)
|
14
13
|
|
15
14
|
module Bio
|
16
15
|
|
@@ -18,18 +17,19 @@ module Bio
|
|
18
17
|
#
|
19
18
|
# The Bio::PubMed class provides several ways to retrieve bibliographic
|
20
19
|
# information from the PubMed database at
|
21
|
-
#
|
22
|
-
#
|
20
|
+
# http://www.ncbi.nlm.nih.gov/sites/entrez?db=PubMed
|
21
|
+
#
|
22
|
+
# Basically, two types of queries are possible:
|
23
23
|
#
|
24
24
|
# * searching for PubMed IDs given a query string:
|
25
|
-
# * Bio::PubMed#
|
26
|
-
# * Bio::PubMed#
|
25
|
+
# * Bio::PubMed#esearch (recommended)
|
26
|
+
# * Bio::PubMed#search (only retrieves top 20 hits)
|
27
27
|
#
|
28
28
|
# * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...)
|
29
29
|
# given a PubMed ID
|
30
|
-
# * Bio::PubMed#
|
31
|
-
# * Bio::PubMed#
|
32
|
-
# * Bio::PubMed#
|
30
|
+
# * Bio::PubMed#efetch (recommended)
|
31
|
+
# * Bio::PubMed#query (unstable for the change of the HTML design)
|
32
|
+
# * Bio::PubMed#pmfetch (still working but could be obsoleted by NCBI)
|
33
33
|
#
|
34
34
|
# The different methods within the same group are interchangeable and should
|
35
35
|
# return the same result.
|
@@ -37,54 +37,61 @@ module Bio
|
|
37
37
|
# Additional information about the MEDLINE format and PubMed programmable
|
38
38
|
# APIs can be found on the following websites:
|
39
39
|
#
|
40
|
-
# * Overview:
|
41
|
-
#
|
42
|
-
# *
|
43
|
-
#
|
44
|
-
# * Entrez utilities index:
|
45
|
-
#
|
46
|
-
# *
|
40
|
+
# * PubMed Overview:
|
41
|
+
# http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html
|
42
|
+
# * PubMed help:
|
43
|
+
# http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html
|
44
|
+
# * Entrez utilities index:
|
45
|
+
# http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
|
46
|
+
# * How to link:
|
47
|
+
# http://www.ncbi.nlm.nih.gov/books/bv.fcgi?rid=helplinks.chapter.linkshelp
|
47
48
|
#
|
48
49
|
# == Usage
|
49
50
|
#
|
50
51
|
# require 'bio'
|
51
52
|
#
|
52
53
|
# # If you don't know the pubmed ID:
|
53
|
-
# Bio::PubMed.
|
54
|
+
# Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics").each do |x|
|
54
55
|
# p x
|
55
56
|
# end
|
56
|
-
#
|
57
|
+
#
|
58
|
+
# Bio::PubMed.search("(genome AND analysis) OR bioinformatics").each do |x|
|
57
59
|
# p x
|
58
60
|
# end
|
59
61
|
#
|
60
62
|
# # To retrieve the MEDLINE entry for a given PubMed ID:
|
63
|
+
# puts Bio::PubMed.efetch("10592173", "14693808")
|
61
64
|
# puts Bio::PubMed.query("10592173")
|
62
65
|
# puts Bio::PubMed.pmfetch("10592173")
|
63
|
-
#
|
66
|
+
#
|
64
67
|
# # This can be converted into a Bio::MEDLINE object:
|
65
68
|
# manuscript = Bio::PubMed.query("10592173")
|
66
|
-
# medline = Bio::MEDLINE(manuscript)
|
69
|
+
# medline = Bio::MEDLINE.new(manuscript)
|
67
70
|
#
|
68
71
|
class PubMed
|
69
72
|
|
70
|
-
#
|
71
|
-
#
|
72
|
-
#
|
73
|
-
# *Arguments*:
|
74
|
-
# * _id_: query string (required)
|
75
|
-
# *Returns*:: array of PubMed IDs
|
76
|
-
def self.search(str)
|
77
|
-
host = "www.ncbi.nlm.nih.gov"
|
78
|
-
path = "/entrez/query.fcgi?tool=bioruby&cmd=Search&doptcmdl=MEDLINE&db=PubMed&term="
|
73
|
+
# Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time
|
74
|
+
# weekdays for any series of more than 100 requests.
|
75
|
+
# -> Not implemented yet in BioRuby
|
79
76
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
77
|
+
# Make no more than one request every 3 seconds.
|
78
|
+
NCBI_INTERVAL = 3
|
79
|
+
@@last_access = nil
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
def ncbi_access_wait(wait = NCBI_INTERVAL)
|
84
|
+
if @@last_access
|
85
|
+
duration = Time.now - @@last_access
|
86
|
+
if wait > duration
|
87
|
+
sleep wait - duration
|
88
|
+
end
|
89
|
+
end
|
90
|
+
@@last_access = Time.now
|
86
91
|
end
|
87
92
|
|
93
|
+
public
|
94
|
+
|
88
95
|
# Search the PubMed database by given keywords using E-Utils and returns
|
89
96
|
# an array of PubMed IDs.
|
90
97
|
#
|
@@ -102,22 +109,80 @@ class PubMed
|
|
102
109
|
# * _retmax_ (default 100)
|
103
110
|
# * _retmode_
|
104
111
|
# * _rettype_
|
105
|
-
# *Returns*:: array of PubMed IDs
|
106
|
-
def
|
107
|
-
|
112
|
+
# *Returns*:: array of PubMed IDs or a number of results
|
113
|
+
def esearch(str, hash = {})
|
114
|
+
return nil if str.empty?
|
108
115
|
|
109
|
-
|
110
|
-
|
111
|
-
|
116
|
+
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
117
|
+
opts = {
|
118
|
+
"retmax" => 100,
|
119
|
+
"tool" => "bioruby",
|
120
|
+
"db" => "pubmed",
|
121
|
+
"term" => str
|
122
|
+
}
|
123
|
+
opts.update(hash)
|
124
|
+
|
125
|
+
ncbi_access_wait
|
126
|
+
|
127
|
+
response, = Bio::Command.post_form(serv, opts)
|
128
|
+
result = response.body
|
129
|
+
if opts['rettype'] == 'count'
|
130
|
+
result = result.scan(/<Count>(.*?)<\/Count>/m).flatten.first.to_i
|
131
|
+
else
|
132
|
+
result = result.scan(/<Id>(.*?)<\/Id>/m).flatten
|
112
133
|
end
|
134
|
+
return result
|
135
|
+
end
|
136
|
+
|
137
|
+
# Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
|
138
|
+
# entrez efetch. Multiple PubMed IDs can be provided:
|
139
|
+
# Bio::PubMed.efetch(123)
|
140
|
+
# Bio::PubMed.efetch([123,456,789])
|
141
|
+
# ---
|
142
|
+
# *Arguments*:
|
143
|
+
# * _ids_: list of PubMed IDs (required)
|
144
|
+
# *Returns*:: Array of MEDLINE formatted String
|
145
|
+
def efetch(ids, hash = {})
|
146
|
+
return nil if ids.to_s.empty?
|
147
|
+
ids = ids.join(",") if ids === Array
|
148
|
+
|
149
|
+
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
|
150
|
+
opts = {
|
151
|
+
"tool" => "bioruby",
|
152
|
+
"db" => "pubmed",
|
153
|
+
"retmode" => "text",
|
154
|
+
"rettype" => "medline",
|
155
|
+
"id" => ids,
|
156
|
+
}
|
157
|
+
opts.update(hash)
|
158
|
+
|
159
|
+
ncbi_access_wait
|
160
|
+
|
161
|
+
response, = Bio::Command.post_form(serv, opts)
|
162
|
+
result = response.body
|
163
|
+
if opts["retmode"] == "text"
|
164
|
+
result = result.split(/\n\n+/)
|
165
|
+
end
|
166
|
+
return result
|
167
|
+
end
|
168
|
+
|
169
|
+
# Search the PubMed database by given keywords using entrez query and returns
|
170
|
+
# an array of PubMed IDs. Caution: this method returns the first 20 hits only.
|
171
|
+
# Instead, use of the 'esearch' method is strongly recomended.
|
172
|
+
# ---
|
173
|
+
# *Arguments*:
|
174
|
+
# * _id_: query string (required)
|
175
|
+
# *Returns*:: array of PubMed IDs
|
176
|
+
def search(str)
|
177
|
+
host = "www.ncbi.nlm.nih.gov"
|
178
|
+
path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term="
|
113
179
|
|
114
|
-
|
115
|
-
path = "/entrez/eutils/esearch.fcgi?tool=bioruby&db=pubmed&#{opts.join('&')}&term="
|
180
|
+
ncbi_access_wait
|
116
181
|
|
117
182
|
http = Bio::Command.new_http(host)
|
118
183
|
response, = http.get(path + CGI.escape(str))
|
119
184
|
result = response.body
|
120
|
-
result = result.scan(
|
185
|
+
result = result.scan(/value="(\d+)" id="UidCheckBox"/m).flatten
|
121
186
|
return result
|
122
187
|
end
|
123
188
|
|
@@ -127,18 +192,27 @@ class PubMed
|
|
127
192
|
# *Arguments*:
|
128
193
|
# * _id_: PubMed ID (required)
|
129
194
|
# *Returns*:: MEDLINE formatted String
|
130
|
-
def
|
195
|
+
def query(*ids)
|
131
196
|
host = "www.ncbi.nlm.nih.gov"
|
132
|
-
path = "/entrez
|
197
|
+
path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
|
198
|
+
list = ids.join(",")
|
199
|
+
|
200
|
+
ncbi_access_wait
|
133
201
|
|
134
202
|
http = Bio::Command.new_http(host)
|
135
|
-
response, = http.get(path +
|
203
|
+
response, = http.get(path + list)
|
136
204
|
result = response.body
|
137
|
-
|
205
|
+
result = result.scan(/<pre>\s*(.*?)<\/pre>/m).flatten
|
206
|
+
|
207
|
+
if result =~ /id:.*Error occurred/
|
208
|
+
# id: xxxxx Error occurred: Article does not exist
|
138
209
|
raise( result )
|
139
210
|
else
|
140
|
-
|
141
|
-
|
211
|
+
if ids.size > 1
|
212
|
+
return result
|
213
|
+
else
|
214
|
+
return result.first
|
215
|
+
end
|
142
216
|
end
|
143
217
|
end
|
144
218
|
|
@@ -148,10 +222,12 @@ class PubMed
|
|
148
222
|
# *Arguments*:
|
149
223
|
# * _id_: PubMed ID (required)
|
150
224
|
# *Returns*:: MEDLINE formatted String
|
151
|
-
def
|
225
|
+
def pmfetch(id)
|
152
226
|
host = "www.ncbi.nlm.nih.gov"
|
153
227
|
path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
|
154
228
|
|
229
|
+
ncbi_access_wait
|
230
|
+
|
155
231
|
http = Bio::Command.new_http(host)
|
156
232
|
response, = http.get(path + id.to_s)
|
157
233
|
result = response.body
|
@@ -163,28 +239,24 @@ class PubMed
|
|
163
239
|
end
|
164
240
|
end
|
165
241
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
# * _ids_: list of PubMed IDs (required)
|
174
|
-
# *Returns*:: MEDLINE formatted String
|
175
|
-
def self.efetch(*ids)
|
176
|
-
return [] if ids.empty?
|
242
|
+
def self.esearch(*args)
|
243
|
+
self.new.esearch(*args)
|
244
|
+
end
|
245
|
+
|
246
|
+
def self.efetch(*args)
|
247
|
+
self.new.efetch(*args)
|
248
|
+
end
|
177
249
|
|
178
|
-
|
179
|
-
|
250
|
+
def self.search(*args)
|
251
|
+
self.new.search(*args)
|
252
|
+
end
|
180
253
|
|
181
|
-
|
254
|
+
def self.query(*args)
|
255
|
+
self.new.query(*args)
|
256
|
+
end
|
182
257
|
|
183
|
-
|
184
|
-
|
185
|
-
result = response.body
|
186
|
-
result = result.split(/\n\n+/)
|
187
|
-
return result
|
258
|
+
def self.pmfetch(*args)
|
259
|
+
self.new.pmfetch(*args)
|
188
260
|
end
|
189
261
|
|
190
262
|
end # PubMed
|
@@ -194,18 +266,88 @@ end # Bio
|
|
194
266
|
|
195
267
|
if __FILE__ == $0
|
196
268
|
|
197
|
-
puts
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
269
|
+
puts "=== instance methods ==="
|
270
|
+
|
271
|
+
pubmed = Bio::PubMed.new
|
272
|
+
|
273
|
+
puts "--- Search PubMed by E-Utils ---"
|
274
|
+
opts = {"rettype" => "count"}
|
275
|
+
puts Time.now
|
276
|
+
puts pubmed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
277
|
+
puts Time.now
|
278
|
+
puts pubmed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
279
|
+
puts Time.now
|
280
|
+
puts pubmed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
281
|
+
puts Time.now
|
282
|
+
pubmed.esearch("(genome AND analysis) OR bioinformatics").each do |x|
|
283
|
+
puts x
|
284
|
+
end
|
285
|
+
|
286
|
+
puts "--- Retrieve PubMed entry by E-Utils ---"
|
287
|
+
puts Time.now
|
288
|
+
puts pubmed.efetch(16381885)
|
289
|
+
puts Time.now
|
290
|
+
puts pubmed.efetch("16381885")
|
291
|
+
puts Time.now
|
292
|
+
puts pubmed.efetch("16381885")
|
293
|
+
puts Time.now
|
294
|
+
opts = {"retmode" => "xml"}
|
295
|
+
puts pubmed.efetch([10592173, 14693808], opts)
|
296
|
+
puts Time.now
|
297
|
+
puts pubmed.efetch(["10592173", "14693808"], opts)
|
298
|
+
|
299
|
+
puts "--- Search PubMed by Entrez CGI ---"
|
300
|
+
pubmed.search("(genome AND analysis) OR bioinformatics").each do |x|
|
202
301
|
p x
|
203
302
|
end
|
204
|
-
|
205
|
-
|
303
|
+
|
304
|
+
puts "--- Retrieve PubMed entry by Entrez CGI ---"
|
305
|
+
puts pubmed.query("16381885")
|
306
|
+
|
307
|
+
|
308
|
+
puts "--- Retrieve PubMed entry by PMfetch ---"
|
309
|
+
puts pubmed.pmfetch("16381885")
|
310
|
+
|
311
|
+
|
312
|
+
puts "=== class methods ==="
|
313
|
+
|
314
|
+
|
315
|
+
puts "--- Search PubMed by E-Utils ---"
|
316
|
+
opts = {"rettype" => "count"}
|
317
|
+
puts Time.now
|
318
|
+
puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
319
|
+
puts Time.now
|
320
|
+
puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
321
|
+
puts Time.now
|
322
|
+
puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
323
|
+
puts Time.now
|
324
|
+
Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics").each do |x|
|
325
|
+
puts x
|
326
|
+
end
|
327
|
+
|
328
|
+
puts "--- Retrieve PubMed entry by E-Utils ---"
|
329
|
+
puts Time.now
|
330
|
+
puts Bio::PubMed.efetch(16381885)
|
331
|
+
puts Time.now
|
332
|
+
puts Bio::PubMed.efetch("16381885")
|
333
|
+
puts Time.now
|
334
|
+
puts Bio::PubMed.efetch("16381885")
|
335
|
+
puts Time.now
|
336
|
+
opts = {"retmode" => "xml"}
|
337
|
+
puts Bio::PubMed.efetch([10592173, 14693808], opts)
|
338
|
+
puts Time.now
|
339
|
+
puts Bio::PubMed.efetch(["10592173", "14693808"], opts)
|
340
|
+
|
341
|
+
puts "--- Search PubMed by Entrez CGI ---"
|
342
|
+
Bio::PubMed.search("(genome AND analysis) OR bioinformatics").each do |x|
|
206
343
|
p x
|
207
344
|
end
|
208
|
-
|
209
|
-
puts
|
345
|
+
|
346
|
+
puts "--- Retrieve PubMed entry by Entrez CGI ---"
|
347
|
+
puts Bio::PubMed.query("16381885")
|
348
|
+
|
349
|
+
|
350
|
+
puts "--- Retrieve PubMed entry by PMfetch ---"
|
351
|
+
puts Bio::PubMed.pmfetch("16381885")
|
210
352
|
|
211
353
|
end
|