entrez 0.5.6 → 0.5.7
Sign up to get free protection for your applications and to get access to all the features.
- data/.rspec +1 -0
- data/README.rdoc +6 -1
- data/entrez.gemspec +5 -2
- data/lib/entrez.rb +9 -23
- data/lib/entrez/version.rb +1 -1
- data/lib/httparty/response/ids.rb +32 -0
- data/lib/httparty/response_ext.rb +22 -0
- data/spec/entrez_spec.rb +9 -23
- data/spec/response_ids_spec.rb +26 -0
- data/spec/spec_helper.rb +5 -3
- data/spec/support/fixtures/esearch_1.xml +5 -0
- data/spec/support/fixtures/esearch_1_2_3.xml +7 -0
- data/spec/support/fixtures/esearch_empty.xml +4 -0
- data/spec/support/macros.rb +14 -2
- metadata +38 -5
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--colour
|
data/README.rdoc
CHANGED
@@ -54,9 +54,14 @@ args:
|
|
54
54
|
|
55
55
|
Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'}, retmode: :xml)
|
56
56
|
#=> makes request to http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=genomeprj&term=hapmap[WORD]+AND+inprogress[SEQS]&retmode=xml.
|
57
|
-
#=> returns XML document with list of
|
57
|
+
#=> returns XML document with list of ids of genome projects that match the searc term criteria.
|
58
58
|
#=> i.e. genome projects that have 'hapmap' in the description and whose sequencing status is 'inprogress'.
|
59
59
|
|
60
|
+
The response has a convenience method to retrieve the parsed ids.
|
61
|
+
|
62
|
+
response = Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'}, retmode: :xml)
|
63
|
+
response.ids #=> [1, 2, ...]
|
64
|
+
|
60
65
|
==== Customized Queries
|
61
66
|
|
62
67
|
You can build your own customized queries if you have something more complex with ANDs and ORs.
|
data/entrez.gemspec
CHANGED
@@ -19,7 +19,10 @@ Gem::Specification.new do |s|
|
|
19
19
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
20
|
s.require_paths = ["lib"]
|
21
21
|
|
22
|
-
s.add_runtime_dependency 'httparty' # Written with version 0.7.
|
23
|
-
|
22
|
+
s.add_runtime_dependency 'httparty' # Written with version 0.7.8.
|
23
|
+
|
24
|
+
s.add_development_dependency 'awesome_print'
|
25
|
+
s.add_development_dependency 'fakeweb', '1.3.0'
|
26
|
+
s.add_development_dependency 'rspec', '2.6.0'
|
24
27
|
|
25
28
|
end
|
data/lib/entrez.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'httparty'
|
2
|
+
require 'httparty/response_ext'
|
2
3
|
require 'query_string_normalizer'
|
3
4
|
|
4
5
|
class Entrez
|
@@ -21,12 +22,10 @@ class Entrez
|
|
21
22
|
end
|
22
23
|
|
23
24
|
# E.g. Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'}, retmode: :xml)
|
24
|
-
# returns response. For convenience, response.ids() returns array of ID integers from result set.
|
25
25
|
# search_terms can also be string literal.
|
26
26
|
def ESearch(db, search_terms = {}, params = {})
|
27
27
|
params[:term] = search_terms.is_a?(Hash) ? convert_search_term_hash(search_terms) : search_terms
|
28
28
|
response = perform '/esearch.fcgi', db, params
|
29
|
-
parse_ids_and_extend response if response[:retmode].nil? || response[:retmode] == :xml
|
30
29
|
response
|
31
30
|
end
|
32
31
|
|
@@ -58,15 +57,18 @@ class Entrez
|
|
58
57
|
|
59
58
|
private
|
60
59
|
|
60
|
+
# NCBI does not allow more than 3 requests per second.
|
61
|
+
# Unless 3 requests ago was more than 1 second ago,
|
62
|
+
# sleep for enough time to honor limit.
|
61
63
|
def respect_query_limit
|
62
64
|
three_requests_ago = request_times[-3]
|
63
65
|
return unless three_requests_ago
|
64
|
-
|
65
|
-
|
66
|
-
enough_time_has_passed = now > three_requests_ago + 1
|
66
|
+
time_for_last_3_requeests = Time.now.to_f - three_requests_ago
|
67
|
+
enough_time_has_passed = time_for_last_3_requeests >= 1
|
67
68
|
unless enough_time_has_passed
|
68
|
-
|
69
|
-
|
69
|
+
sleep_time = 1 - time_for_last_3_requeests
|
70
|
+
STDERR.puts "sleeping #{sleep_time}"
|
71
|
+
sleep(sleep_time)
|
70
72
|
end
|
71
73
|
end
|
72
74
|
|
@@ -74,22 +76,6 @@ class Entrez
|
|
74
76
|
@request_times ||= []
|
75
77
|
end
|
76
78
|
|
77
|
-
# Define ids() method which will parse and return the IDs from the XML response.
|
78
|
-
def parse_ids_and_extend(response)
|
79
|
-
response.instance_eval do
|
80
|
-
def ids
|
81
|
-
return @ids if @ids
|
82
|
-
id_content = self['eSearchResult']['IdList']['Id']
|
83
|
-
# If there is only 1, Crack will parse it and return just the string.
|
84
|
-
# Need to always return array.
|
85
|
-
id_content = [id_content].flatten
|
86
|
-
@ids = id_content.map(&:to_i)
|
87
|
-
rescue ::NoMethodError
|
88
|
-
@ids = []
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
79
|
end
|
94
80
|
|
95
81
|
class UnknownOperator < StandardError
|
data/lib/entrez/version.rb
CHANGED
@@ -0,0 +1,32 @@
|
|
1
|
+
module HTTParty
|
2
|
+
|
3
|
+
module ResponseIds
|
4
|
+
|
5
|
+
# For ESearch, add convenience method that parses ids and converts to array of integers.
|
6
|
+
# Only works if either no retern mode specified or if it is :xml.
|
7
|
+
def ids
|
8
|
+
if parse_ids?
|
9
|
+
return @ids if @ids
|
10
|
+
id_list = parsed_response['eSearchResult']['IdList']
|
11
|
+
if id_list
|
12
|
+
id_content = id_list['Id']
|
13
|
+
id_content = [id_content].flatten
|
14
|
+
@ids = id_content.map(&:to_i)
|
15
|
+
else
|
16
|
+
@ids = []
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
# Parse only if this is an ESearch request and in xml format.
|
24
|
+
def parse_ids?
|
25
|
+
esearch_request? && (retmode.nil? || xml?)
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
HTTParty::Response.send :include, HTTParty::ResponseIds
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'httparty/response/ids'
|
2
|
+
|
3
|
+
module HTTParty
|
4
|
+
module ResponseExt
|
5
|
+
|
6
|
+
# Get the return mode from request.
|
7
|
+
def retmode
|
8
|
+
request.options[:query][:retmode]
|
9
|
+
end
|
10
|
+
|
11
|
+
def xml?
|
12
|
+
retmode == :xml
|
13
|
+
end
|
14
|
+
|
15
|
+
def esearch_request?
|
16
|
+
request.path.to_s.match /esearch/
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
HTTParty::Response.send :include, HTTParty::ResponseExt
|
data/spec/entrez_spec.rb
CHANGED
@@ -8,7 +8,7 @@ describe Entrez do
|
|
8
8
|
end
|
9
9
|
|
10
10
|
it '#EFetch retrieves results' do
|
11
|
-
response = Entrez.EFetch('taxonomy', id: 9606
|
11
|
+
response = Entrez.EFetch('taxonomy', id: 9606)
|
12
12
|
response.body.should include('Homo sapiens')
|
13
13
|
end
|
14
14
|
|
@@ -18,39 +18,23 @@ describe Entrez do
|
|
18
18
|
end
|
19
19
|
|
20
20
|
it '#EInfo retrieves results' do
|
21
|
-
response = Entrez.EInfo('snp'
|
21
|
+
response = Entrez.EInfo('snp')
|
22
22
|
response.body.should include('<Name>RS</Name>')
|
23
23
|
end
|
24
24
|
|
25
25
|
context '#ESearch' do
|
26
26
|
|
27
27
|
it 'retrieves results' do
|
28
|
-
response = Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'}
|
28
|
+
response = Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'})
|
29
29
|
response.body.should include('28911')
|
30
30
|
end
|
31
31
|
|
32
|
-
it 'response returns IDs for convenience' do
|
33
|
-
response = Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'}, retmode: :xml)
|
34
|
-
response.ids.should == [60153, 29429, 28911, 48101, 59851, 59849, 59847, 59845, 59839, 59835, 59833, 59831, 51895, 59829, 59827, 60835, 59811, 60831, 60819, 33895]
|
35
|
-
end
|
36
|
-
|
37
|
-
it 'returns empty array if nothing found' do
|
38
|
-
response = Entrez.ESearch('genomeprj', {NON_EXISTENT_SEARCH_FIELD: 'does not exist even in oompaloompa land'}, retmode: :xml)
|
39
|
-
response.ids.should be_empty
|
40
|
-
end
|
41
|
-
|
42
|
-
it 'returns array even if only 1 id found' do
|
43
|
-
id = 60153
|
44
|
-
response = Entrez.ESearch('genomeprj', {uid: id}, retmode: :xml)
|
45
|
-
response.ids.should == [id]
|
46
|
-
end
|
47
|
-
|
48
32
|
it 'accepts string as search_terms parameter' do
|
49
|
-
response = Entrez.ESearch('genomeprj', 'hapmap[WORD]'
|
33
|
+
response = Entrez.ESearch('genomeprj', 'hapmap[WORD]')
|
50
34
|
response.ids.should include(60153)
|
51
35
|
end
|
52
36
|
|
53
|
-
it '
|
37
|
+
it 'handles array of uids' do
|
54
38
|
response = Entrez.ESearch('gene', {UID: [1, 2, 3]})
|
55
39
|
response.ids.should =~ [1, 2, 3]
|
56
40
|
end
|
@@ -58,8 +42,10 @@ describe Entrez do
|
|
58
42
|
end
|
59
43
|
|
60
44
|
it 'should respect query limit' do
|
61
|
-
|
62
|
-
|
45
|
+
fake_service :ESearch, 'esearch_empty.xml' do
|
46
|
+
requests = proc { 4.times { Entrez.ESearch('asdf') } }
|
47
|
+
requests.should take_longer_than(1.0)
|
48
|
+
end
|
63
49
|
end
|
64
50
|
|
65
51
|
it 'should convert search term hash into query string with AND operator by default' do
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe HTTParty::ResponseIds do
|
4
|
+
|
5
|
+
it 'parses body and returns IDs' do
|
6
|
+
fake_service :ESearch, 'esearch_1_2_3.xml' do
|
7
|
+
response = Entrez.ESearch('asdf')
|
8
|
+
response.ids.should == [1, 2, 3]
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'returns empty array if nothing found' do
|
13
|
+
fake_service :ESearch, 'esearch_empty.xml' do
|
14
|
+
response = Entrez.ESearch('asdf')
|
15
|
+
response.ids.should be_empty
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'returns array even if only 1 id found' do
|
20
|
+
fake_service :ESearch, 'esearch_1.xml' do
|
21
|
+
response = Entrez.ESearch('asdf')
|
22
|
+
response.ids.should == [1]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,10 +1,12 @@
|
|
1
|
+
require 'awesome_print'
|
2
|
+
require 'fakeweb'
|
1
3
|
require 'pathname'
|
2
4
|
|
3
5
|
require File.join(Pathname(__FILE__).dirname.expand_path, '../lib/entrez')
|
4
|
-
|
5
|
-
|
6
|
+
Entrez.default_params(
|
7
|
+
retmode: :xml,
|
8
|
+
)
|
6
9
|
Dir[File.expand_path("../support/**/*.rb", __FILE__)].each {|f| require f}
|
7
|
-
|
8
10
|
RSpec.configure do |config|
|
9
11
|
config.include(Macros)
|
10
12
|
end
|
@@ -0,0 +1,5 @@
|
|
1
|
+
<?xml version="1.0" ?>
|
2
|
+
<!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD eSearchResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSearch_020511.dtd">
|
3
|
+
<eSearchResult><Count>41</Count><RetMax>20</RetMax><RetStart>0</RetStart><IdList>
|
4
|
+
<Id>1</Id>
|
5
|
+
</IdList><TranslationSet/><TranslationStack> <TermSet> <Term>hapmap[WORD]</Term> <Field>WORD</Field> <Count>3</Count> <Explode>Y</Explode> </TermSet> <TermSet> <Term>inprogress[SEQS]</Term> <Field>SEQS</Field> <Count>3913</Count> <Explode>Y</Explode> </TermSet> <OP>AND</OP> </TranslationStack><QueryTranslation>hapmap[WORD] AND inprogress[SEQS]</QueryTranslation></eSearchResult>
|
@@ -0,0 +1,7 @@
|
|
1
|
+
<?xml version="1.0" ?>
|
2
|
+
<!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD eSearchResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSearch_020511.dtd">
|
3
|
+
<eSearchResult><Count>41</Count><RetMax>20</RetMax><RetStart>0</RetStart><IdList>
|
4
|
+
<Id>1</Id>
|
5
|
+
<Id>2</Id>
|
6
|
+
<Id>3</Id>
|
7
|
+
</IdList><TranslationSet/><TranslationStack> <TermSet> <Term>hapmap[WORD]</Term> <Field>WORD</Field> <Count>3</Count> <Explode>Y</Explode> </TermSet> <TermSet> <Term>inprogress[SEQS]</Term> <Field>SEQS</Field> <Count>3913</Count> <Explode>Y</Explode> </TermSet> <OP>AND</OP> </TranslationStack><QueryTranslation>hapmap[WORD] AND inprogress[SEQS]</QueryTranslation></eSearchResult>
|
@@ -0,0 +1,4 @@
|
|
1
|
+
<?xml version="1.0" ?>
|
2
|
+
<!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD eSearchResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSearch_020511.dtd">
|
3
|
+
<eSearchResult><Count>41</Count><RetMax>20</RetMax><RetStart>0</RetStart><IdList>
|
4
|
+
</IdList><TranslationSet/><TranslationStack> <TermSet> <Term>hapmap[WORD]</Term> <Field>WORD</Field> <Count>3</Count> <Explode>Y</Explode> </TermSet> <TermSet> <Term>inprogress[SEQS]</Term> <Field>SEQS</Field> <Count>3913</Count> <Explode>Y</Explode> </TermSet> <OP>AND</OP> </TranslationStack><QueryTranslation>hapmap[WORD] AND inprogress[SEQS]</QueryTranslation></eSearchResult>
|
data/spec/support/macros.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module Macros
|
2
2
|
|
3
|
-
def
|
4
|
-
File.open(File.join(File.dirname(__FILE__), 'fixtures/', file_name))
|
3
|
+
def fixture_file(file_name)
|
4
|
+
File.open(File.join(File.dirname(__FILE__), 'fixtures/', file_name))
|
5
5
|
end
|
6
6
|
|
7
7
|
# Return how long it takes to run block.
|
@@ -12,4 +12,16 @@ module Macros
|
|
12
12
|
end_time - start_time
|
13
13
|
end
|
14
14
|
|
15
|
+
# Use FakeWeb to simulate Entrez service with contents of fixture file.
|
16
|
+
# Since the generated URL is a bit difficult to capture,
|
17
|
+
# Faked uri will just match regular expression of service.
|
18
|
+
# When block ends, clean registry.
|
19
|
+
def fake_service(service, fixture_file_name)
|
20
|
+
file_contents = fixture_file(fixture_file_name).read
|
21
|
+
FakeWeb.register_uri(:get, Regexp.new(service.to_s.downcase), body: file_contents, content_type: 'text/xml')
|
22
|
+
yield
|
23
|
+
ensure
|
24
|
+
FakeWeb.clean_registry
|
25
|
+
end
|
26
|
+
|
15
27
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: entrez
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.5.
|
5
|
+
version: 0.5.7
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Jared Ning
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-
|
13
|
+
date: 2011-08-15 00:00:00 -05:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
@@ -25,16 +25,38 @@ dependencies:
|
|
25
25
|
type: :runtime
|
26
26
|
version_requirements: *id001
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: awesome_print
|
29
29
|
prerelease: false
|
30
30
|
requirement: &id002 !ruby/object:Gem::Requirement
|
31
31
|
none: false
|
32
32
|
requirements:
|
33
|
-
- - "
|
33
|
+
- - ">="
|
34
34
|
- !ruby/object:Gem::Version
|
35
|
-
version:
|
35
|
+
version: "0"
|
36
36
|
type: :development
|
37
37
|
version_requirements: *id002
|
38
|
+
- !ruby/object:Gem::Dependency
|
39
|
+
name: fakeweb
|
40
|
+
prerelease: false
|
41
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
43
|
+
requirements:
|
44
|
+
- - "="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 1.3.0
|
47
|
+
type: :development
|
48
|
+
version_requirements: *id003
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: rspec
|
51
|
+
prerelease: false
|
52
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - "="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: 2.6.0
|
58
|
+
type: :development
|
59
|
+
version_requirements: *id004
|
38
60
|
description: Simple API for HTTP requests to Entrez E-utilities
|
39
61
|
email:
|
40
62
|
- jared@redningja.com
|
@@ -47,6 +69,7 @@ extra_rdoc_files: []
|
|
47
69
|
files:
|
48
70
|
- .gemtest
|
49
71
|
- .gitignore
|
72
|
+
- .rspec
|
50
73
|
- .rvmrc
|
51
74
|
- Gemfile
|
52
75
|
- README.rdoc
|
@@ -54,10 +77,16 @@ files:
|
|
54
77
|
- entrez.gemspec
|
55
78
|
- lib/entrez.rb
|
56
79
|
- lib/entrez/version.rb
|
80
|
+
- lib/httparty/response/ids.rb
|
81
|
+
- lib/httparty/response_ext.rb
|
57
82
|
- lib/query_string_normalizer.rb
|
58
83
|
- spec/entrez_spec.rb
|
59
84
|
- spec/query_string_normalizer_spec.rb
|
85
|
+
- spec/response_ids_spec.rb
|
60
86
|
- spec/spec_helper.rb
|
87
|
+
- spec/support/fixtures/esearch_1.xml
|
88
|
+
- spec/support/fixtures/esearch_1_2_3.xml
|
89
|
+
- spec/support/fixtures/esearch_empty.xml
|
61
90
|
- spec/support/macros.rb
|
62
91
|
- spec/support/matchers.rb
|
63
92
|
has_rdoc: true
|
@@ -91,6 +120,10 @@ summary: HTTP requests to Entrez E-utilities
|
|
91
120
|
test_files:
|
92
121
|
- spec/entrez_spec.rb
|
93
122
|
- spec/query_string_normalizer_spec.rb
|
123
|
+
- spec/response_ids_spec.rb
|
94
124
|
- spec/spec_helper.rb
|
125
|
+
- spec/support/fixtures/esearch_1.xml
|
126
|
+
- spec/support/fixtures/esearch_1_2_3.xml
|
127
|
+
- spec/support/fixtures/esearch_empty.xml
|
95
128
|
- spec/support/macros.rb
|
96
129
|
- spec/support/matchers.rb
|