entrez 0.5.7 → 0.5.8
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +15 -0
- data/lib/entrez.rb +6 -24
- data/lib/entrez/options.rb +11 -0
- data/lib/entrez/query_limit.rb +35 -0
- data/lib/entrez/spec_helpers.rb +12 -0
- data/lib/entrez/version.rb +1 -1
- data/spec/entrez_spec.rb +0 -7
- data/spec/query_limit_spec.rb +21 -0
- data/spec/spec_helper.rb +10 -3
- data/spec/support/macros.rb +7 -2
- data/spec/support/matchers.rb +1 -1
- metadata +7 -2
data/README.rdoc
CHANGED
@@ -93,6 +93,21 @@ NCBI recommends no more than 3 URL requests per second: http://www.ncbi.nlm.nih.
|
|
93
93
|
This gem respects this limit. It will delay the next request if the last 3 have been made within 1 second.
|
94
94
|
The amount of delay time is no more than what is necessary to make the next request "respectful".
|
95
95
|
|
96
|
+
=== Ignore query limits for testing
|
97
|
+
|
98
|
+
If you use something like FakeWeb for testing, and you don't want to slow down your tests, tell Entrez to ignore the query limit:
|
99
|
+
|
100
|
+
require 'entrez/spec_helpers'
|
101
|
+
it 'does something that I promise will not bother NCBI' do
|
102
|
+
Entrez.ignore_query_limit do
|
103
|
+
# Anything that happens within this block will ignore the query limit.
|
104
|
+
# So make sure you do not actually request queries from NCBI.
|
105
|
+
# For example:
|
106
|
+
FakeWeb.allow_net_connect = false
|
107
|
+
end
|
108
|
+
# Query limits are respected again outside of the block.
|
109
|
+
end
|
110
|
+
|
96
111
|
== Compatibility
|
97
112
|
|
98
113
|
http://test.rubygems.org/gems/entrez
|
data/lib/entrez.rb
CHANGED
@@ -1,14 +1,18 @@
|
|
1
1
|
require 'httparty'
|
2
2
|
require 'httparty/response_ext'
|
3
3
|
require 'query_string_normalizer'
|
4
|
+
require 'entrez/options'
|
5
|
+
require 'entrez/query_limit'
|
4
6
|
|
5
|
-
|
7
|
+
module Entrez
|
6
8
|
|
7
9
|
include HTTParty
|
8
10
|
base_uri 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils'
|
9
11
|
default_params tool: 'ruby', email: (ENV['ENTREZ_EMAIL'] || raise('please set ENTREZ_EMAIL environment variable'))
|
10
12
|
query_string_normalizer QueryStringNormalizer
|
11
13
|
|
14
|
+
extend QueryLimit
|
15
|
+
|
12
16
|
class << self
|
13
17
|
|
14
18
|
# E.g. Entrez.EFetch('snp', id: 123, retmode: :xml)
|
@@ -35,8 +39,7 @@ class Entrez
|
|
35
39
|
end
|
36
40
|
|
37
41
|
def perform(utility_path, db, params = {})
|
38
|
-
respect_query_limit
|
39
|
-
request_times << Time.now.to_f
|
42
|
+
respect_query_limit unless ignore_query_limit?
|
40
43
|
get utility_path, :query => {db: db}.merge(params)
|
41
44
|
end
|
42
45
|
|
@@ -55,27 +58,6 @@ class Entrez
|
|
55
58
|
str
|
56
59
|
end
|
57
60
|
|
58
|
-
private
|
59
|
-
|
60
|
-
# NCBI does not allow more than 3 requests per second.
|
61
|
-
# Unless 3 requests ago was more than 1 second ago,
|
62
|
-
# sleep for enough time to honor limit.
|
63
|
-
def respect_query_limit
|
64
|
-
three_requests_ago = request_times[-3]
|
65
|
-
return unless three_requests_ago
|
66
|
-
time_for_last_3_requeests = Time.now.to_f - three_requests_ago
|
67
|
-
enough_time_has_passed = time_for_last_3_requeests >= 1
|
68
|
-
unless enough_time_has_passed
|
69
|
-
sleep_time = 1 - time_for_last_3_requeests
|
70
|
-
STDERR.puts "sleeping #{sleep_time}"
|
71
|
-
sleep(sleep_time)
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
def request_times
|
76
|
-
@request_times ||= []
|
77
|
-
end
|
78
|
-
|
79
61
|
end
|
80
62
|
|
81
63
|
class UnknownOperator < StandardError
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Entrez
|
2
|
+
module QueryLimit
|
3
|
+
|
4
|
+
private
|
5
|
+
|
6
|
+
# NCBI does not allow more than 3 requests per second.
|
7
|
+
# If the last 3 requests happened within the last 1 second,
|
8
|
+
# sleep for enough time to let a full 1 second pass before the next request.
|
9
|
+
# Add current time to queue.
|
10
|
+
def respect_query_limit
|
11
|
+
now = Time.now.to_f
|
12
|
+
three_requests_ago = request_times[-3]
|
13
|
+
request_times << now
|
14
|
+
return unless three_requests_ago
|
15
|
+
time_for_last_3_requeests = now - three_requests_ago
|
16
|
+
enough_time_has_passed = time_for_last_3_requeests >= 1.0
|
17
|
+
unless enough_time_has_passed
|
18
|
+
sleep_time = 1 - time_for_last_3_requeests
|
19
|
+
STDERR.puts "sleeping #{sleep_time}"
|
20
|
+
sleep(sleep_time)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def request_times
|
25
|
+
@request_times ||= []
|
26
|
+
end
|
27
|
+
|
28
|
+
# Only way to set this should be through requiring entrez/spec_helpers and
|
29
|
+
# calling Entrez.ignore_query_limit(&block).
|
30
|
+
def ignore_query_limit?
|
31
|
+
!!@ignore_query_limit
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
Entrez.extend(Module.new do
|
2
|
+
|
3
|
+
# For the duration of the block, Entrez requests will ignore query limit.
|
4
|
+
# FOR TESTING ONLY WHERE NO REQUESTS ARE MADE TO NCBI.
|
5
|
+
# REQUESTS MUST BE STUBBED/FAKED.
|
6
|
+
def ignore_query_limit(&block)
|
7
|
+
@ignore_query_limit = true
|
8
|
+
block.call
|
9
|
+
@ignore_query_limit = false
|
10
|
+
end
|
11
|
+
|
12
|
+
end)
|
data/lib/entrez/version.rb
CHANGED
data/spec/entrez_spec.rb
CHANGED
@@ -41,13 +41,6 @@ describe Entrez do
|
|
41
41
|
|
42
42
|
end
|
43
43
|
|
44
|
-
it 'should respect query limit' do
|
45
|
-
fake_service :ESearch, 'esearch_empty.xml' do
|
46
|
-
requests = proc { 4.times { Entrez.ESearch('asdf') } }
|
47
|
-
requests.should take_longer_than(1.0)
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
44
|
it 'should convert search term hash into query string with AND operator by default' do
|
52
45
|
query_string = {TITL: 'BRCA1', ORGN: 'human'}
|
53
46
|
Entrez.convert_search_term_hash(query_string).should == 'BRCA1[TITL]+AND+human[ORGN]'
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'Entrez#respect_query_limit' do
|
4
|
+
|
5
|
+
it 'should not perform more than 3 queries per second' do
|
6
|
+
fake_service :ESearch, 'esearch_empty.xml', ignore_query_limit: false do
|
7
|
+
requests = proc { 4.times { Entrez.ESearch('asdf') } }
|
8
|
+
requests.should take_longer_than(1.0)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'can be disabled for testing such as when FakeWeb is used' do
|
13
|
+
Entrez.ignore_query_limit do
|
14
|
+
fake_service :ESearch, 'esearch_empty.xml' do
|
15
|
+
requests = proc { 4.times { Entrez.ESearch('asdf') } }
|
16
|
+
requests.should_not take_longer_than(1.0)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -2,11 +2,18 @@ require 'awesome_print'
|
|
2
2
|
require 'fakeweb'
|
3
3
|
require 'pathname'
|
4
4
|
|
5
|
+
# Require the gem source.
|
5
6
|
require File.join(Pathname(__FILE__).dirname.expand_path, '../lib/entrez')
|
6
|
-
|
7
|
-
|
8
|
-
|
7
|
+
require 'entrez/spec_helpers'
|
8
|
+
|
9
|
+
# Require spec/support files.
|
9
10
|
Dir[File.expand_path("../support/**/*.rb", __FILE__)].each {|f| require f}
|
11
|
+
|
10
12
|
RSpec.configure do |config|
|
11
13
|
config.include(Macros)
|
12
14
|
end
|
15
|
+
|
16
|
+
# Default return mode to XML.
|
17
|
+
Entrez.default_params(
|
18
|
+
retmode: :xml,
|
19
|
+
)
|
data/spec/support/macros.rb
CHANGED
@@ -16,10 +16,15 @@ module Macros
|
|
16
16
|
# Since the generated URL is a bit difficult to capture,
|
17
17
|
# Faked uri will just match regular expression of service.
|
18
18
|
# When block ends, clean registry.
|
19
|
-
|
19
|
+
# Ignore Entrez query limit unless told not to.
|
20
|
+
def fake_service(service, fixture_file_name, options = {}, &block)
|
20
21
|
file_contents = fixture_file(fixture_file_name).read
|
21
22
|
FakeWeb.register_uri(:get, Regexp.new(service.to_s.downcase), body: file_contents, content_type: 'text/xml')
|
22
|
-
|
23
|
+
if options.fetch(:ignore_query_limit, true)
|
24
|
+
Entrez.ignore_query_limit(&block)
|
25
|
+
else
|
26
|
+
block.call
|
27
|
+
end
|
23
28
|
ensure
|
24
29
|
FakeWeb.clean_registry
|
25
30
|
end
|
data/spec/support/matchers.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
RSpec::Matchers.define :take_longer_than do |seconds|
|
2
2
|
match do |process|
|
3
3
|
@elapsed_time = timer(&process)
|
4
|
-
@elapsed_time
|
4
|
+
@elapsed_time > seconds
|
5
5
|
end
|
6
6
|
failure_message_for_should do
|
7
7
|
"Expected process to take longer than #{seconds} seconds (actual: #{@elapsed_time})"
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: entrez
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.5.
|
5
|
+
version: 0.5.8
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Jared Ning
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-08-
|
13
|
+
date: 2011-08-17 00:00:00 -05:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
@@ -76,11 +76,15 @@ files:
|
|
76
76
|
- Rakefile
|
77
77
|
- entrez.gemspec
|
78
78
|
- lib/entrez.rb
|
79
|
+
- lib/entrez/options.rb
|
80
|
+
- lib/entrez/query_limit.rb
|
81
|
+
- lib/entrez/spec_helpers.rb
|
79
82
|
- lib/entrez/version.rb
|
80
83
|
- lib/httparty/response/ids.rb
|
81
84
|
- lib/httparty/response_ext.rb
|
82
85
|
- lib/query_string_normalizer.rb
|
83
86
|
- spec/entrez_spec.rb
|
87
|
+
- spec/query_limit_spec.rb
|
84
88
|
- spec/query_string_normalizer_spec.rb
|
85
89
|
- spec/response_ids_spec.rb
|
86
90
|
- spec/spec_helper.rb
|
@@ -119,6 +123,7 @@ specification_version: 3
|
|
119
123
|
summary: HTTP requests to Entrez E-utilities
|
120
124
|
test_files:
|
121
125
|
- spec/entrez_spec.rb
|
126
|
+
- spec/query_limit_spec.rb
|
122
127
|
- spec/query_string_normalizer_spec.rb
|
123
128
|
- spec/response_ids_spec.rb
|
124
129
|
- spec/spec_helper.rb
|