entrez 0.5.7 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +15 -0
- data/lib/entrez.rb +6 -24
- data/lib/entrez/options.rb +11 -0
- data/lib/entrez/query_limit.rb +35 -0
- data/lib/entrez/spec_helpers.rb +12 -0
- data/lib/entrez/version.rb +1 -1
- data/spec/entrez_spec.rb +0 -7
- data/spec/query_limit_spec.rb +21 -0
- data/spec/spec_helper.rb +10 -3
- data/spec/support/macros.rb +7 -2
- data/spec/support/matchers.rb +1 -1
- metadata +7 -2
data/README.rdoc
CHANGED
@@ -93,6 +93,21 @@ NCBI recommends no more than 3 URL requests per second: http://www.ncbi.nlm.nih.
|
|
93
93
|
This gem respects this limit. It will delay the next request if the last 3 have been made within 1 second.
|
94
94
|
The amount of delay time is no more than what is necessary to make the next request "respectful".
|
95
95
|
|
96
|
+
=== Ignore query limits for testing
|
97
|
+
|
98
|
+
If you use something like FakeWeb for testing, and you don't want to slow down your tests, tell Entrez to ignore the query limit:
|
99
|
+
|
100
|
+
require 'entrez/spec_helpers'
|
101
|
+
it 'does something that I promise will not bother NCBI' do
|
102
|
+
Entrez.ignore_query_limit do
|
103
|
+
# Anything that happens within this block will ignore the query limit.
|
104
|
+
# So make sure you do not actually request queries from NCBI.
|
105
|
+
# For example:
|
106
|
+
FakeWeb.allow_net_connect = false
|
107
|
+
end
|
108
|
+
# Query limits are respected again outside of the block.
|
109
|
+
end
|
110
|
+
|
96
111
|
== Compatibility
|
97
112
|
|
98
113
|
http://test.rubygems.org/gems/entrez
|
data/lib/entrez.rb
CHANGED
@@ -1,14 +1,18 @@
|
|
1
1
|
require 'httparty'
|
2
2
|
require 'httparty/response_ext'
|
3
3
|
require 'query_string_normalizer'
|
4
|
+
require 'entrez/options'
|
5
|
+
require 'entrez/query_limit'
|
4
6
|
|
5
|
-
|
7
|
+
module Entrez
|
6
8
|
|
7
9
|
include HTTParty
|
8
10
|
base_uri 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils'
|
9
11
|
default_params tool: 'ruby', email: (ENV['ENTREZ_EMAIL'] || raise('please set ENTREZ_EMAIL environment variable'))
|
10
12
|
query_string_normalizer QueryStringNormalizer
|
11
13
|
|
14
|
+
extend QueryLimit
|
15
|
+
|
12
16
|
class << self
|
13
17
|
|
14
18
|
# E.g. Entrez.EFetch('snp', id: 123, retmode: :xml)
|
@@ -35,8 +39,7 @@ class Entrez
|
|
35
39
|
end
|
36
40
|
|
37
41
|
def perform(utility_path, db, params = {})
|
38
|
-
respect_query_limit
|
39
|
-
request_times << Time.now.to_f
|
42
|
+
respect_query_limit unless ignore_query_limit?
|
40
43
|
get utility_path, :query => {db: db}.merge(params)
|
41
44
|
end
|
42
45
|
|
@@ -55,27 +58,6 @@ class Entrez
|
|
55
58
|
str
|
56
59
|
end
|
57
60
|
|
58
|
-
private
|
59
|
-
|
60
|
-
# NCBI does not allow more than 3 requests per second.
|
61
|
-
# Unless 3 requests ago was more than 1 second ago,
|
62
|
-
# sleep for enough time to honor limit.
|
63
|
-
def respect_query_limit
|
64
|
-
three_requests_ago = request_times[-3]
|
65
|
-
return unless three_requests_ago
|
66
|
-
time_for_last_3_requeests = Time.now.to_f - three_requests_ago
|
67
|
-
enough_time_has_passed = time_for_last_3_requeests >= 1
|
68
|
-
unless enough_time_has_passed
|
69
|
-
sleep_time = 1 - time_for_last_3_requeests
|
70
|
-
STDERR.puts "sleeping #{sleep_time}"
|
71
|
-
sleep(sleep_time)
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
def request_times
|
76
|
-
@request_times ||= []
|
77
|
-
end
|
78
|
-
|
79
61
|
end
|
80
62
|
|
81
63
|
class UnknownOperator < StandardError
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Entrez
|
2
|
+
module QueryLimit
|
3
|
+
|
4
|
+
private
|
5
|
+
|
6
|
+
# NCBI does not allow more than 3 requests per second.
|
7
|
+
# If the last 3 requests happened within the last 1 second,
|
8
|
+
# sleep for enough time to let a full 1 second pass before the next request.
|
9
|
+
# Add current time to queue.
|
10
|
+
def respect_query_limit
|
11
|
+
now = Time.now.to_f
|
12
|
+
three_requests_ago = request_times[-3]
|
13
|
+
request_times << now
|
14
|
+
return unless three_requests_ago
|
15
|
+
time_for_last_3_requeests = now - three_requests_ago
|
16
|
+
enough_time_has_passed = time_for_last_3_requeests >= 1.0
|
17
|
+
unless enough_time_has_passed
|
18
|
+
sleep_time = 1 - time_for_last_3_requeests
|
19
|
+
STDERR.puts "sleeping #{sleep_time}"
|
20
|
+
sleep(sleep_time)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def request_times
|
25
|
+
@request_times ||= []
|
26
|
+
end
|
27
|
+
|
28
|
+
# Only way to set this should be through requiring entrez/spec_helpers and
|
29
|
+
# calling Entrez.ignore_query_limit(&block).
|
30
|
+
def ignore_query_limit?
|
31
|
+
!!@ignore_query_limit
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
Entrez.extend(Module.new do
|
2
|
+
|
3
|
+
# For the duration of the block, Entrez requests will ignore query limit.
|
4
|
+
# FOR TESTING ONLY WHERE NO REQUESTS ARE MADE TO NCBI.
|
5
|
+
# REQUESTS MUST BE STUBBED/FAKED.
|
6
|
+
def ignore_query_limit(&block)
|
7
|
+
@ignore_query_limit = true
|
8
|
+
block.call
|
9
|
+
@ignore_query_limit = false
|
10
|
+
end
|
11
|
+
|
12
|
+
end)
|
data/lib/entrez/version.rb
CHANGED
data/spec/entrez_spec.rb
CHANGED
@@ -41,13 +41,6 @@ describe Entrez do
|
|
41
41
|
|
42
42
|
end
|
43
43
|
|
44
|
-
it 'should respect query limit' do
|
45
|
-
fake_service :ESearch, 'esearch_empty.xml' do
|
46
|
-
requests = proc { 4.times { Entrez.ESearch('asdf') } }
|
47
|
-
requests.should take_longer_than(1.0)
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
44
|
it 'should convert search term hash into query string with AND operator by default' do
|
52
45
|
query_string = {TITL: 'BRCA1', ORGN: 'human'}
|
53
46
|
Entrez.convert_search_term_hash(query_string).should == 'BRCA1[TITL]+AND+human[ORGN]'
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'Entrez#respect_query_limit' do
|
4
|
+
|
5
|
+
it 'should not perform more than 3 queries per second' do
|
6
|
+
fake_service :ESearch, 'esearch_empty.xml', ignore_query_limit: false do
|
7
|
+
requests = proc { 4.times { Entrez.ESearch('asdf') } }
|
8
|
+
requests.should take_longer_than(1.0)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'can be disabled for testing such as when FakeWeb is used' do
|
13
|
+
Entrez.ignore_query_limit do
|
14
|
+
fake_service :ESearch, 'esearch_empty.xml' do
|
15
|
+
requests = proc { 4.times { Entrez.ESearch('asdf') } }
|
16
|
+
requests.should_not take_longer_than(1.0)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -2,11 +2,18 @@ require 'awesome_print'
|
|
2
2
|
require 'fakeweb'
|
3
3
|
require 'pathname'
|
4
4
|
|
5
|
+
# Require the gem source.
|
5
6
|
require File.join(Pathname(__FILE__).dirname.expand_path, '../lib/entrez')
|
6
|
-
|
7
|
-
|
8
|
-
|
7
|
+
require 'entrez/spec_helpers'
|
8
|
+
|
9
|
+
# Require spec/support files.
|
9
10
|
Dir[File.expand_path("../support/**/*.rb", __FILE__)].each {|f| require f}
|
11
|
+
|
10
12
|
RSpec.configure do |config|
|
11
13
|
config.include(Macros)
|
12
14
|
end
|
15
|
+
|
16
|
+
# Default return mode to XML.
|
17
|
+
Entrez.default_params(
|
18
|
+
retmode: :xml,
|
19
|
+
)
|
data/spec/support/macros.rb
CHANGED
@@ -16,10 +16,15 @@ module Macros
|
|
16
16
|
# Since the generated URL is a bit difficult to capture,
|
17
17
|
# Faked uri will just match regular expression of service.
|
18
18
|
# When block ends, clean registry.
|
19
|
-
|
19
|
+
# Ignore Entrez query limit unless told not to.
|
20
|
+
def fake_service(service, fixture_file_name, options = {}, &block)
|
20
21
|
file_contents = fixture_file(fixture_file_name).read
|
21
22
|
FakeWeb.register_uri(:get, Regexp.new(service.to_s.downcase), body: file_contents, content_type: 'text/xml')
|
22
|
-
|
23
|
+
if options.fetch(:ignore_query_limit, true)
|
24
|
+
Entrez.ignore_query_limit(&block)
|
25
|
+
else
|
26
|
+
block.call
|
27
|
+
end
|
23
28
|
ensure
|
24
29
|
FakeWeb.clean_registry
|
25
30
|
end
|
data/spec/support/matchers.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
RSpec::Matchers.define :take_longer_than do |seconds|
|
2
2
|
match do |process|
|
3
3
|
@elapsed_time = timer(&process)
|
4
|
-
@elapsed_time
|
4
|
+
@elapsed_time > seconds
|
5
5
|
end
|
6
6
|
failure_message_for_should do
|
7
7
|
"Expected process to take longer than #{seconds} seconds (actual: #{@elapsed_time})"
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: entrez
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.5.
|
5
|
+
version: 0.5.8
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Jared Ning
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-08-
|
13
|
+
date: 2011-08-17 00:00:00 -05:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
@@ -76,11 +76,15 @@ files:
|
|
76
76
|
- Rakefile
|
77
77
|
- entrez.gemspec
|
78
78
|
- lib/entrez.rb
|
79
|
+
- lib/entrez/options.rb
|
80
|
+
- lib/entrez/query_limit.rb
|
81
|
+
- lib/entrez/spec_helpers.rb
|
79
82
|
- lib/entrez/version.rb
|
80
83
|
- lib/httparty/response/ids.rb
|
81
84
|
- lib/httparty/response_ext.rb
|
82
85
|
- lib/query_string_normalizer.rb
|
83
86
|
- spec/entrez_spec.rb
|
87
|
+
- spec/query_limit_spec.rb
|
84
88
|
- spec/query_string_normalizer_spec.rb
|
85
89
|
- spec/response_ids_spec.rb
|
86
90
|
- spec/spec_helper.rb
|
@@ -119,6 +123,7 @@ specification_version: 3
|
|
119
123
|
summary: HTTP requests to Entrez E-utilities
|
120
124
|
test_files:
|
121
125
|
- spec/entrez_spec.rb
|
126
|
+
- spec/query_limit_spec.rb
|
122
127
|
- spec/query_string_normalizer_spec.rb
|
123
128
|
- spec/response_ids_spec.rb
|
124
129
|
- spec/spec_helper.rb
|