bio 1.1.0 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/bioruby +4 -3
- data/lib/bio.rb +3 -3
- data/lib/bio/appl/blast/format0.rb +3 -2
- data/lib/bio/appl/blast/format8.rb +5 -3
- data/lib/bio/db/kegg/compound.rb +6 -1
- data/lib/bio/db/kegg/enzyme.rb +3 -3
- data/lib/bio/db/kegg/genes.rb +2 -2
- data/lib/bio/db/kegg/glycan.rb +5 -5
- data/lib/bio/db/kegg/orthology.rb +27 -3
- data/lib/bio/db/newick.rb +203 -55
- data/lib/bio/io/flatfile.rb +2 -2
- data/lib/bio/io/flatfile/indexer.rb +2 -2
- data/lib/bio/io/keggapi.rb +2 -1
- data/lib/bio/io/pubmed.rb +223 -81
- data/lib/bio/sequence/common.rb +6 -3
- data/lib/bio/shell/interface.rb +2 -2
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +5 -5
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +7 -8
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +1 -1
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +21 -17
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/spinner.gif +0 -0
- data/test/functional/bio/io/test_ensembl.rb +87 -4
- data/test/unit/bio/db/test_newick.rb +238 -1
- data/test/unit/bio/sequence/test_aa.rb +3 -2
- data/test/unit/bio/sequence/test_common.rb +11 -2
- data/test/unit/bio/sequence/test_na.rb +63 -1
- metadata +4 -4
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
data/lib/bio/io/flatfile.rb
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
#
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id: flatfile.rb,v 1.
|
8
|
+
# $Id: flatfile.rb,v 1.61 2007/11/15 07:07:16 k Exp $
|
9
9
|
#
|
10
10
|
#
|
11
11
|
# Bio::FlatFile is a helper and wrapper class to read a biological data file.
|
@@ -1130,7 +1130,7 @@ module Bio
|
|
1130
1130
|
genpept = RuleRegexp[ 'Bio::GenPept',
|
1131
1131
|
/^LOCUS .+ aa .+/ ],
|
1132
1132
|
medline = RuleRegexp[ 'Bio::MEDLINE',
|
1133
|
-
/^
|
1133
|
+
/^PMID\- [0-9]+$/ ],
|
1134
1134
|
embl = RuleRegexp[ 'Bio::EMBL',
|
1135
1135
|
/^ID .+\; .*(DNA|RNA|XXX)\;/ ],
|
1136
1136
|
sptr = RuleRegexp2[ 'Bio::SPTR',
|
@@ -4,7 +4,7 @@
|
|
4
4
|
# Copyright:: Copyright (C) 2002 GOTO Naohisa <ng@bioruby.org>
|
5
5
|
# License:: The Ruby License
|
6
6
|
#
|
7
|
-
# $Id: indexer.rb,v 1.
|
7
|
+
# $Id: indexer.rb,v 1.26 2007/12/11 15:13:32 ngoto Exp $
|
8
8
|
#
|
9
9
|
|
10
10
|
require 'bio/io/flatfile/index'
|
@@ -714,7 +714,7 @@ module Bio
|
|
714
714
|
|
715
715
|
##############################################################
|
716
716
|
def self.formatstring2class(format_string)
|
717
|
-
case
|
717
|
+
case format_string
|
718
718
|
when /genbank/i
|
719
719
|
dbclass = Bio::GenBank
|
720
720
|
when /genpept/i
|
data/lib/bio/io/keggapi.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
# Copyright:: Copyright (C) 2003, 2004 Toshiaki Katayama <k@bioruby.org>
|
5
5
|
# License:: The Ruby License
|
6
6
|
#
|
7
|
-
# $Id: keggapi.rb,v 1.
|
7
|
+
# $Id: keggapi.rb,v 1.15 2007/07/20 21:56:45 k Exp $
|
8
8
|
#
|
9
9
|
|
10
10
|
require 'bio/io/soapwsdl'
|
@@ -331,6 +331,7 @@ class API < Bio::SOAPWSDL
|
|
331
331
|
def add_filter(results)
|
332
332
|
if results.is_a?(Array)
|
333
333
|
results.each do |result|
|
334
|
+
next if result.is_a?(Fixnum)
|
334
335
|
def result.filter(fields)
|
335
336
|
fields.collect { |field| self.send(field) }
|
336
337
|
end
|
data/lib/bio/io/pubmed.rb
CHANGED
@@ -1,16 +1,15 @@
|
|
1
1
|
#
|
2
2
|
# = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
|
3
3
|
#
|
4
|
-
# Copyright:: Copyright (C) 2001 Toshiaki Katayama <k@bioruby.org>
|
4
|
+
# Copyright:: Copyright (C) 2001, 2007 Toshiaki Katayama <k@bioruby.org>
|
5
5
|
# Copyright:: Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id: pubmed.rb,v 1.
|
8
|
+
# $Id: pubmed.rb,v 1.23 2007/12/12 13:53:26 k Exp $
|
9
9
|
#
|
10
10
|
|
11
|
-
require 'net/http'
|
12
|
-
require 'cgi' unless defined?(CGI)
|
13
11
|
require 'bio/command'
|
12
|
+
require 'cgi' unless defined?(CGI)
|
14
13
|
|
15
14
|
module Bio
|
16
15
|
|
@@ -18,18 +17,19 @@ module Bio
|
|
18
17
|
#
|
19
18
|
# The Bio::PubMed class provides several ways to retrieve bibliographic
|
20
19
|
# information from the PubMed database at
|
21
|
-
#
|
22
|
-
#
|
20
|
+
# http://www.ncbi.nlm.nih.gov/sites/entrez?db=PubMed
|
21
|
+
#
|
22
|
+
# Basically, two types of queries are possible:
|
23
23
|
#
|
24
24
|
# * searching for PubMed IDs given a query string:
|
25
|
-
# * Bio::PubMed#
|
26
|
-
# * Bio::PubMed#
|
25
|
+
# * Bio::PubMed#esearch (recommended)
|
26
|
+
# * Bio::PubMed#search (only retrieves top 20 hits)
|
27
27
|
#
|
28
28
|
# * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...)
|
29
29
|
# given a PubMed ID
|
30
|
-
# * Bio::PubMed#
|
31
|
-
# * Bio::PubMed#
|
32
|
-
# * Bio::PubMed#
|
30
|
+
# * Bio::PubMed#efetch (recommended)
|
31
|
+
# * Bio::PubMed#query (unstable for the change of the HTML design)
|
32
|
+
# * Bio::PubMed#pmfetch (still working but could be obsoleted by NCBI)
|
33
33
|
#
|
34
34
|
# The different methods within the same group are interchangeable and should
|
35
35
|
# return the same result.
|
@@ -37,54 +37,61 @@ module Bio
|
|
37
37
|
# Additional information about the MEDLINE format and PubMed programmable
|
38
38
|
# APIs can be found on the following websites:
|
39
39
|
#
|
40
|
-
# * Overview:
|
41
|
-
#
|
42
|
-
# *
|
43
|
-
#
|
44
|
-
# * Entrez utilities index:
|
45
|
-
#
|
46
|
-
# *
|
40
|
+
# * PubMed Overview:
|
41
|
+
# http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html
|
42
|
+
# * PubMed help:
|
43
|
+
# http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html
|
44
|
+
# * Entrez utilities index:
|
45
|
+
# http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
|
46
|
+
# * How to link:
|
47
|
+
# http://www.ncbi.nlm.nih.gov/books/bv.fcgi?rid=helplinks.chapter.linkshelp
|
47
48
|
#
|
48
49
|
# == Usage
|
49
50
|
#
|
50
51
|
# require 'bio'
|
51
52
|
#
|
52
53
|
# # If you don't know the pubmed ID:
|
53
|
-
# Bio::PubMed.
|
54
|
+
# Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics").each do |x|
|
54
55
|
# p x
|
55
56
|
# end
|
56
|
-
#
|
57
|
+
#
|
58
|
+
# Bio::PubMed.search("(genome AND analysis) OR bioinformatics").each do |x|
|
57
59
|
# p x
|
58
60
|
# end
|
59
61
|
#
|
60
62
|
# # To retrieve the MEDLINE entry for a given PubMed ID:
|
63
|
+
# puts Bio::PubMed.efetch("10592173", "14693808")
|
61
64
|
# puts Bio::PubMed.query("10592173")
|
62
65
|
# puts Bio::PubMed.pmfetch("10592173")
|
63
|
-
#
|
66
|
+
#
|
64
67
|
# # This can be converted into a Bio::MEDLINE object:
|
65
68
|
# manuscript = Bio::PubMed.query("10592173")
|
66
|
-
# medline = Bio::MEDLINE(manuscript)
|
69
|
+
# medline = Bio::MEDLINE.new(manuscript)
|
67
70
|
#
|
68
71
|
class PubMed
|
69
72
|
|
70
|
-
#
|
71
|
-
#
|
72
|
-
#
|
73
|
-
# *Arguments*:
|
74
|
-
# * _id_: query string (required)
|
75
|
-
# *Returns*:: array of PubMed IDs
|
76
|
-
def self.search(str)
|
77
|
-
host = "www.ncbi.nlm.nih.gov"
|
78
|
-
path = "/entrez/query.fcgi?tool=bioruby&cmd=Search&doptcmdl=MEDLINE&db=PubMed&term="
|
73
|
+
# Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time
|
74
|
+
# weekdays for any series of more than 100 requests.
|
75
|
+
# -> Not implemented yet in BioRuby
|
79
76
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
77
|
+
# Make no more than one request every 3 seconds.
|
78
|
+
NCBI_INTERVAL = 3
|
79
|
+
@@last_access = nil
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
def ncbi_access_wait(wait = NCBI_INTERVAL)
|
84
|
+
if @@last_access
|
85
|
+
duration = Time.now - @@last_access
|
86
|
+
if wait > duration
|
87
|
+
sleep wait - duration
|
88
|
+
end
|
89
|
+
end
|
90
|
+
@@last_access = Time.now
|
86
91
|
end
|
87
92
|
|
93
|
+
public
|
94
|
+
|
88
95
|
# Search the PubMed database by given keywords using E-Utils and returns
|
89
96
|
# an array of PubMed IDs.
|
90
97
|
#
|
@@ -102,22 +109,80 @@ class PubMed
|
|
102
109
|
# * _retmax_ (default 100)
|
103
110
|
# * _retmode_
|
104
111
|
# * _rettype_
|
105
|
-
# *Returns*:: array of PubMed IDs
|
106
|
-
def
|
107
|
-
|
112
|
+
# *Returns*:: array of PubMed IDs or a number of results
|
113
|
+
def esearch(str, hash = {})
|
114
|
+
return nil if str.empty?
|
108
115
|
|
109
|
-
|
110
|
-
|
111
|
-
|
116
|
+
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
117
|
+
opts = {
|
118
|
+
"retmax" => 100,
|
119
|
+
"tool" => "bioruby",
|
120
|
+
"db" => "pubmed",
|
121
|
+
"term" => str
|
122
|
+
}
|
123
|
+
opts.update(hash)
|
124
|
+
|
125
|
+
ncbi_access_wait
|
126
|
+
|
127
|
+
response, = Bio::Command.post_form(serv, opts)
|
128
|
+
result = response.body
|
129
|
+
if opts['rettype'] == 'count'
|
130
|
+
result = result.scan(/<Count>(.*?)<\/Count>/m).flatten.first.to_i
|
131
|
+
else
|
132
|
+
result = result.scan(/<Id>(.*?)<\/Id>/m).flatten
|
112
133
|
end
|
134
|
+
return result
|
135
|
+
end
|
136
|
+
|
137
|
+
# Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
|
138
|
+
# entrez efetch. Multiple PubMed IDs can be provided:
|
139
|
+
# Bio::PubMed.efetch(123)
|
140
|
+
# Bio::PubMed.efetch([123,456,789])
|
141
|
+
# ---
|
142
|
+
# *Arguments*:
|
143
|
+
# * _ids_: list of PubMed IDs (required)
|
144
|
+
# *Returns*:: Array of MEDLINE formatted String
|
145
|
+
def efetch(ids, hash = {})
|
146
|
+
return nil if ids.to_s.empty?
|
147
|
+
ids = ids.join(",") if ids === Array
|
148
|
+
|
149
|
+
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
|
150
|
+
opts = {
|
151
|
+
"tool" => "bioruby",
|
152
|
+
"db" => "pubmed",
|
153
|
+
"retmode" => "text",
|
154
|
+
"rettype" => "medline",
|
155
|
+
"id" => ids,
|
156
|
+
}
|
157
|
+
opts.update(hash)
|
158
|
+
|
159
|
+
ncbi_access_wait
|
160
|
+
|
161
|
+
response, = Bio::Command.post_form(serv, opts)
|
162
|
+
result = response.body
|
163
|
+
if opts["retmode"] == "text"
|
164
|
+
result = result.split(/\n\n+/)
|
165
|
+
end
|
166
|
+
return result
|
167
|
+
end
|
168
|
+
|
169
|
+
# Search the PubMed database by given keywords using entrez query and returns
|
170
|
+
# an array of PubMed IDs. Caution: this method returns the first 20 hits only.
|
171
|
+
# Instead, use of the 'esearch' method is strongly recomended.
|
172
|
+
# ---
|
173
|
+
# *Arguments*:
|
174
|
+
# * _id_: query string (required)
|
175
|
+
# *Returns*:: array of PubMed IDs
|
176
|
+
def search(str)
|
177
|
+
host = "www.ncbi.nlm.nih.gov"
|
178
|
+
path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term="
|
113
179
|
|
114
|
-
|
115
|
-
path = "/entrez/eutils/esearch.fcgi?tool=bioruby&db=pubmed&#{opts.join('&')}&term="
|
180
|
+
ncbi_access_wait
|
116
181
|
|
117
182
|
http = Bio::Command.new_http(host)
|
118
183
|
response, = http.get(path + CGI.escape(str))
|
119
184
|
result = response.body
|
120
|
-
result = result.scan(
|
185
|
+
result = result.scan(/value="(\d+)" id="UidCheckBox"/m).flatten
|
121
186
|
return result
|
122
187
|
end
|
123
188
|
|
@@ -127,18 +192,27 @@ class PubMed
|
|
127
192
|
# *Arguments*:
|
128
193
|
# * _id_: PubMed ID (required)
|
129
194
|
# *Returns*:: MEDLINE formatted String
|
130
|
-
def
|
195
|
+
def query(*ids)
|
131
196
|
host = "www.ncbi.nlm.nih.gov"
|
132
|
-
path = "/entrez
|
197
|
+
path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
|
198
|
+
list = ids.join(",")
|
199
|
+
|
200
|
+
ncbi_access_wait
|
133
201
|
|
134
202
|
http = Bio::Command.new_http(host)
|
135
|
-
response, = http.get(path +
|
203
|
+
response, = http.get(path + list)
|
136
204
|
result = response.body
|
137
|
-
|
205
|
+
result = result.scan(/<pre>\s*(.*?)<\/pre>/m).flatten
|
206
|
+
|
207
|
+
if result =~ /id:.*Error occurred/
|
208
|
+
# id: xxxxx Error occurred: Article does not exist
|
138
209
|
raise( result )
|
139
210
|
else
|
140
|
-
|
141
|
-
|
211
|
+
if ids.size > 1
|
212
|
+
return result
|
213
|
+
else
|
214
|
+
return result.first
|
215
|
+
end
|
142
216
|
end
|
143
217
|
end
|
144
218
|
|
@@ -148,10 +222,12 @@ class PubMed
|
|
148
222
|
# *Arguments*:
|
149
223
|
# * _id_: PubMed ID (required)
|
150
224
|
# *Returns*:: MEDLINE formatted String
|
151
|
-
def
|
225
|
+
def pmfetch(id)
|
152
226
|
host = "www.ncbi.nlm.nih.gov"
|
153
227
|
path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
|
154
228
|
|
229
|
+
ncbi_access_wait
|
230
|
+
|
155
231
|
http = Bio::Command.new_http(host)
|
156
232
|
response, = http.get(path + id.to_s)
|
157
233
|
result = response.body
|
@@ -163,28 +239,24 @@ class PubMed
|
|
163
239
|
end
|
164
240
|
end
|
165
241
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
# * _ids_: list of PubMed IDs (required)
|
174
|
-
# *Returns*:: MEDLINE formatted String
|
175
|
-
def self.efetch(*ids)
|
176
|
-
return [] if ids.empty?
|
242
|
+
def self.esearch(*args)
|
243
|
+
self.new.esearch(*args)
|
244
|
+
end
|
245
|
+
|
246
|
+
def self.efetch(*args)
|
247
|
+
self.new.efetch(*args)
|
248
|
+
end
|
177
249
|
|
178
|
-
|
179
|
-
|
250
|
+
def self.search(*args)
|
251
|
+
self.new.search(*args)
|
252
|
+
end
|
180
253
|
|
181
|
-
|
254
|
+
def self.query(*args)
|
255
|
+
self.new.query(*args)
|
256
|
+
end
|
182
257
|
|
183
|
-
|
184
|
-
|
185
|
-
result = response.body
|
186
|
-
result = result.split(/\n\n+/)
|
187
|
-
return result
|
258
|
+
def self.pmfetch(*args)
|
259
|
+
self.new.pmfetch(*args)
|
188
260
|
end
|
189
261
|
|
190
262
|
end # PubMed
|
@@ -194,18 +266,88 @@ end # Bio
|
|
194
266
|
|
195
267
|
if __FILE__ == $0
|
196
268
|
|
197
|
-
puts
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
269
|
+
puts "=== instance methods ==="
|
270
|
+
|
271
|
+
pubmed = Bio::PubMed.new
|
272
|
+
|
273
|
+
puts "--- Search PubMed by E-Utils ---"
|
274
|
+
opts = {"rettype" => "count"}
|
275
|
+
puts Time.now
|
276
|
+
puts pubmed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
277
|
+
puts Time.now
|
278
|
+
puts pubmed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
279
|
+
puts Time.now
|
280
|
+
puts pubmed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
281
|
+
puts Time.now
|
282
|
+
pubmed.esearch("(genome AND analysis) OR bioinformatics").each do |x|
|
283
|
+
puts x
|
284
|
+
end
|
285
|
+
|
286
|
+
puts "--- Retrieve PubMed entry by E-Utils ---"
|
287
|
+
puts Time.now
|
288
|
+
puts pubmed.efetch(16381885)
|
289
|
+
puts Time.now
|
290
|
+
puts pubmed.efetch("16381885")
|
291
|
+
puts Time.now
|
292
|
+
puts pubmed.efetch("16381885")
|
293
|
+
puts Time.now
|
294
|
+
opts = {"retmode" => "xml"}
|
295
|
+
puts pubmed.efetch([10592173, 14693808], opts)
|
296
|
+
puts Time.now
|
297
|
+
puts pubmed.efetch(["10592173", "14693808"], opts)
|
298
|
+
|
299
|
+
puts "--- Search PubMed by Entrez CGI ---"
|
300
|
+
pubmed.search("(genome AND analysis) OR bioinformatics").each do |x|
|
202
301
|
p x
|
203
302
|
end
|
204
|
-
|
205
|
-
|
303
|
+
|
304
|
+
puts "--- Retrieve PubMed entry by Entrez CGI ---"
|
305
|
+
puts pubmed.query("16381885")
|
306
|
+
|
307
|
+
|
308
|
+
puts "--- Retrieve PubMed entry by PMfetch ---"
|
309
|
+
puts pubmed.pmfetch("16381885")
|
310
|
+
|
311
|
+
|
312
|
+
puts "=== class methods ==="
|
313
|
+
|
314
|
+
|
315
|
+
puts "--- Search PubMed by E-Utils ---"
|
316
|
+
opts = {"rettype" => "count"}
|
317
|
+
puts Time.now
|
318
|
+
puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
319
|
+
puts Time.now
|
320
|
+
puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
321
|
+
puts Time.now
|
322
|
+
puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
323
|
+
puts Time.now
|
324
|
+
Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics").each do |x|
|
325
|
+
puts x
|
326
|
+
end
|
327
|
+
|
328
|
+
puts "--- Retrieve PubMed entry by E-Utils ---"
|
329
|
+
puts Time.now
|
330
|
+
puts Bio::PubMed.efetch(16381885)
|
331
|
+
puts Time.now
|
332
|
+
puts Bio::PubMed.efetch("16381885")
|
333
|
+
puts Time.now
|
334
|
+
puts Bio::PubMed.efetch("16381885")
|
335
|
+
puts Time.now
|
336
|
+
opts = {"retmode" => "xml"}
|
337
|
+
puts Bio::PubMed.efetch([10592173, 14693808], opts)
|
338
|
+
puts Time.now
|
339
|
+
puts Bio::PubMed.efetch(["10592173", "14693808"], opts)
|
340
|
+
|
341
|
+
puts "--- Search PubMed by Entrez CGI ---"
|
342
|
+
Bio::PubMed.search("(genome AND analysis) OR bioinformatics").each do |x|
|
206
343
|
p x
|
207
344
|
end
|
208
|
-
|
209
|
-
puts
|
345
|
+
|
346
|
+
puts "--- Retrieve PubMed entry by Entrez CGI ---"
|
347
|
+
puts Bio::PubMed.query("16381885")
|
348
|
+
|
349
|
+
|
350
|
+
puts "--- Retrieve PubMed entry by PMfetch ---"
|
351
|
+
puts Bio::PubMed.pmfetch("16381885")
|
210
352
|
|
211
353
|
end
|