entrez 0.5.6 → 0.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.rspec +1 -0
- data/README.rdoc +6 -1
- data/entrez.gemspec +5 -2
- data/lib/entrez.rb +9 -23
- data/lib/entrez/version.rb +1 -1
- data/lib/httparty/response/ids.rb +32 -0
- data/lib/httparty/response_ext.rb +22 -0
- data/spec/entrez_spec.rb +9 -23
- data/spec/response_ids_spec.rb +26 -0
- data/spec/spec_helper.rb +5 -3
- data/spec/support/fixtures/esearch_1.xml +5 -0
- data/spec/support/fixtures/esearch_1_2_3.xml +7 -0
- data/spec/support/fixtures/esearch_empty.xml +4 -0
- data/spec/support/macros.rb +14 -2
- metadata +38 -5
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--colour
|
data/README.rdoc
CHANGED
@@ -54,9 +54,14 @@ args:
|
|
54
54
|
|
55
55
|
Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'}, retmode: :xml)
|
56
56
|
#=> makes request to http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=genomeprj&term=hapmap[WORD]+AND+inprogress[SEQS]&retmode=xml.
|
57
|
-
#=> returns XML document with list of
|
57
|
+
#=> returns XML document with list of ids of genome projects that match the searc term criteria.
|
58
58
|
#=> i.e. genome projects that have 'hapmap' in the description and whose sequencing status is 'inprogress'.
|
59
59
|
|
60
|
+
The response has a convenience method to retrieve the parsed ids.
|
61
|
+
|
62
|
+
response = Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'}, retmode: :xml)
|
63
|
+
response.ids #=> [1, 2, ...]
|
64
|
+
|
60
65
|
==== Customized Queries
|
61
66
|
|
62
67
|
You can build your own customized queries if you have something more complex with ANDs and ORs.
|
data/entrez.gemspec
CHANGED
@@ -19,7 +19,10 @@ Gem::Specification.new do |s|
|
|
19
19
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
20
|
s.require_paths = ["lib"]
|
21
21
|
|
22
|
-
s.add_runtime_dependency 'httparty' # Written with version 0.7.
|
23
|
-
|
22
|
+
s.add_runtime_dependency 'httparty' # Written with version 0.7.8.
|
23
|
+
|
24
|
+
s.add_development_dependency 'awesome_print'
|
25
|
+
s.add_development_dependency 'fakeweb', '1.3.0'
|
26
|
+
s.add_development_dependency 'rspec', '2.6.0'
|
24
27
|
|
25
28
|
end
|
data/lib/entrez.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'httparty'
|
2
|
+
require 'httparty/response_ext'
|
2
3
|
require 'query_string_normalizer'
|
3
4
|
|
4
5
|
class Entrez
|
@@ -21,12 +22,10 @@ class Entrez
|
|
21
22
|
end
|
22
23
|
|
23
24
|
# E.g. Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'}, retmode: :xml)
|
24
|
-
# returns response. For convenience, response.ids() returns array of ID integers from result set.
|
25
25
|
# search_terms can also be string literal.
|
26
26
|
def ESearch(db, search_terms = {}, params = {})
|
27
27
|
params[:term] = search_terms.is_a?(Hash) ? convert_search_term_hash(search_terms) : search_terms
|
28
28
|
response = perform '/esearch.fcgi', db, params
|
29
|
-
parse_ids_and_extend response if response[:retmode].nil? || response[:retmode] == :xml
|
30
29
|
response
|
31
30
|
end
|
32
31
|
|
@@ -58,15 +57,18 @@ class Entrez
|
|
58
57
|
|
59
58
|
private
|
60
59
|
|
60
|
+
# NCBI does not allow more than 3 requests per second.
|
61
|
+
# Unless 3 requests ago was more than 1 second ago,
|
62
|
+
# sleep for enough time to honor limit.
|
61
63
|
def respect_query_limit
|
62
64
|
three_requests_ago = request_times[-3]
|
63
65
|
return unless three_requests_ago
|
64
|
-
|
65
|
-
|
66
|
-
enough_time_has_passed = now > three_requests_ago + 1
|
66
|
+
time_for_last_3_requeests = Time.now.to_f - three_requests_ago
|
67
|
+
enough_time_has_passed = time_for_last_3_requeests >= 1
|
67
68
|
unless enough_time_has_passed
|
68
|
-
|
69
|
-
|
69
|
+
sleep_time = 1 - time_for_last_3_requeests
|
70
|
+
STDERR.puts "sleeping #{sleep_time}"
|
71
|
+
sleep(sleep_time)
|
70
72
|
end
|
71
73
|
end
|
72
74
|
|
@@ -74,22 +76,6 @@ class Entrez
|
|
74
76
|
@request_times ||= []
|
75
77
|
end
|
76
78
|
|
77
|
-
# Define ids() method which will parse and return the IDs from the XML response.
|
78
|
-
def parse_ids_and_extend(response)
|
79
|
-
response.instance_eval do
|
80
|
-
def ids
|
81
|
-
return @ids if @ids
|
82
|
-
id_content = self['eSearchResult']['IdList']['Id']
|
83
|
-
# If there is only 1, Crack will parse it and return just the string.
|
84
|
-
# Need to always return array.
|
85
|
-
id_content = [id_content].flatten
|
86
|
-
@ids = id_content.map(&:to_i)
|
87
|
-
rescue ::NoMethodError
|
88
|
-
@ids = []
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
79
|
end
|
94
80
|
|
95
81
|
class UnknownOperator < StandardError
|
data/lib/entrez/version.rb
CHANGED
@@ -0,0 +1,32 @@
|
|
1
|
+
module HTTParty
|
2
|
+
|
3
|
+
module ResponseIds
|
4
|
+
|
5
|
+
# For ESearch, add convenience method that parses ids and converts to array of integers.
|
6
|
+
# Only works if either no retern mode specified or if it is :xml.
|
7
|
+
def ids
|
8
|
+
if parse_ids?
|
9
|
+
return @ids if @ids
|
10
|
+
id_list = parsed_response['eSearchResult']['IdList']
|
11
|
+
if id_list
|
12
|
+
id_content = id_list['Id']
|
13
|
+
id_content = [id_content].flatten
|
14
|
+
@ids = id_content.map(&:to_i)
|
15
|
+
else
|
16
|
+
@ids = []
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
# Parse only if this is an ESearch request and in xml format.
|
24
|
+
def parse_ids?
|
25
|
+
esearch_request? && (retmode.nil? || xml?)
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
HTTParty::Response.send :include, HTTParty::ResponseIds
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'httparty/response/ids'
|
2
|
+
|
3
|
+
module HTTParty
|
4
|
+
module ResponseExt
|
5
|
+
|
6
|
+
# Get the return mode from request.
|
7
|
+
def retmode
|
8
|
+
request.options[:query][:retmode]
|
9
|
+
end
|
10
|
+
|
11
|
+
def xml?
|
12
|
+
retmode == :xml
|
13
|
+
end
|
14
|
+
|
15
|
+
def esearch_request?
|
16
|
+
request.path.to_s.match /esearch/
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
HTTParty::Response.send :include, HTTParty::ResponseExt
|
data/spec/entrez_spec.rb
CHANGED
@@ -8,7 +8,7 @@ describe Entrez do
|
|
8
8
|
end
|
9
9
|
|
10
10
|
it '#EFetch retrieves results' do
|
11
|
-
response = Entrez.EFetch('taxonomy', id: 9606
|
11
|
+
response = Entrez.EFetch('taxonomy', id: 9606)
|
12
12
|
response.body.should include('Homo sapiens')
|
13
13
|
end
|
14
14
|
|
@@ -18,39 +18,23 @@ describe Entrez do
|
|
18
18
|
end
|
19
19
|
|
20
20
|
it '#EInfo retrieves results' do
|
21
|
-
response = Entrez.EInfo('snp'
|
21
|
+
response = Entrez.EInfo('snp')
|
22
22
|
response.body.should include('<Name>RS</Name>')
|
23
23
|
end
|
24
24
|
|
25
25
|
context '#ESearch' do
|
26
26
|
|
27
27
|
it 'retrieves results' do
|
28
|
-
response = Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'}
|
28
|
+
response = Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'})
|
29
29
|
response.body.should include('28911')
|
30
30
|
end
|
31
31
|
|
32
|
-
it 'response returns IDs for convenience' do
|
33
|
-
response = Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'}, retmode: :xml)
|
34
|
-
response.ids.should == [60153, 29429, 28911, 48101, 59851, 59849, 59847, 59845, 59839, 59835, 59833, 59831, 51895, 59829, 59827, 60835, 59811, 60831, 60819, 33895]
|
35
|
-
end
|
36
|
-
|
37
|
-
it 'returns empty array if nothing found' do
|
38
|
-
response = Entrez.ESearch('genomeprj', {NON_EXISTENT_SEARCH_FIELD: 'does not exist even in oompaloompa land'}, retmode: :xml)
|
39
|
-
response.ids.should be_empty
|
40
|
-
end
|
41
|
-
|
42
|
-
it 'returns array even if only 1 id found' do
|
43
|
-
id = 60153
|
44
|
-
response = Entrez.ESearch('genomeprj', {uid: id}, retmode: :xml)
|
45
|
-
response.ids.should == [id]
|
46
|
-
end
|
47
|
-
|
48
32
|
it 'accepts string as search_terms parameter' do
|
49
|
-
response = Entrez.ESearch('genomeprj', 'hapmap[WORD]'
|
33
|
+
response = Entrez.ESearch('genomeprj', 'hapmap[WORD]')
|
50
34
|
response.ids.should include(60153)
|
51
35
|
end
|
52
36
|
|
53
|
-
it '
|
37
|
+
it 'handles array of uids' do
|
54
38
|
response = Entrez.ESearch('gene', {UID: [1, 2, 3]})
|
55
39
|
response.ids.should =~ [1, 2, 3]
|
56
40
|
end
|
@@ -58,8 +42,10 @@ describe Entrez do
|
|
58
42
|
end
|
59
43
|
|
60
44
|
it 'should respect query limit' do
|
61
|
-
|
62
|
-
|
45
|
+
fake_service :ESearch, 'esearch_empty.xml' do
|
46
|
+
requests = proc { 4.times { Entrez.ESearch('asdf') } }
|
47
|
+
requests.should take_longer_than(1.0)
|
48
|
+
end
|
63
49
|
end
|
64
50
|
|
65
51
|
it 'should convert search term hash into query string with AND operator by default' do
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe HTTParty::ResponseIds do
|
4
|
+
|
5
|
+
it 'parses body and returns IDs' do
|
6
|
+
fake_service :ESearch, 'esearch_1_2_3.xml' do
|
7
|
+
response = Entrez.ESearch('asdf')
|
8
|
+
response.ids.should == [1, 2, 3]
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'returns empty array if nothing found' do
|
13
|
+
fake_service :ESearch, 'esearch_empty.xml' do
|
14
|
+
response = Entrez.ESearch('asdf')
|
15
|
+
response.ids.should be_empty
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'returns array even if only 1 id found' do
|
20
|
+
fake_service :ESearch, 'esearch_1.xml' do
|
21
|
+
response = Entrez.ESearch('asdf')
|
22
|
+
response.ids.should == [1]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,10 +1,12 @@
|
|
1
|
+
require 'awesome_print'
|
2
|
+
require 'fakeweb'
|
1
3
|
require 'pathname'
|
2
4
|
|
3
5
|
require File.join(Pathname(__FILE__).dirname.expand_path, '../lib/entrez')
|
4
|
-
|
5
|
-
|
6
|
+
Entrez.default_params(
|
7
|
+
retmode: :xml,
|
8
|
+
)
|
6
9
|
Dir[File.expand_path("../support/**/*.rb", __FILE__)].each {|f| require f}
|
7
|
-
|
8
10
|
RSpec.configure do |config|
|
9
11
|
config.include(Macros)
|
10
12
|
end
|
@@ -0,0 +1,5 @@
|
|
1
|
+
<?xml version="1.0" ?>
|
2
|
+
<!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD eSearchResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSearch_020511.dtd">
|
3
|
+
<eSearchResult><Count>41</Count><RetMax>20</RetMax><RetStart>0</RetStart><IdList>
|
4
|
+
<Id>1</Id>
|
5
|
+
</IdList><TranslationSet/><TranslationStack> <TermSet> <Term>hapmap[WORD]</Term> <Field>WORD</Field> <Count>3</Count> <Explode>Y</Explode> </TermSet> <TermSet> <Term>inprogress[SEQS]</Term> <Field>SEQS</Field> <Count>3913</Count> <Explode>Y</Explode> </TermSet> <OP>AND</OP> </TranslationStack><QueryTranslation>hapmap[WORD] AND inprogress[SEQS]</QueryTranslation></eSearchResult>
|
@@ -0,0 +1,7 @@
|
|
1
|
+
<?xml version="1.0" ?>
|
2
|
+
<!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD eSearchResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSearch_020511.dtd">
|
3
|
+
<eSearchResult><Count>41</Count><RetMax>20</RetMax><RetStart>0</RetStart><IdList>
|
4
|
+
<Id>1</Id>
|
5
|
+
<Id>2</Id>
|
6
|
+
<Id>3</Id>
|
7
|
+
</IdList><TranslationSet/><TranslationStack> <TermSet> <Term>hapmap[WORD]</Term> <Field>WORD</Field> <Count>3</Count> <Explode>Y</Explode> </TermSet> <TermSet> <Term>inprogress[SEQS]</Term> <Field>SEQS</Field> <Count>3913</Count> <Explode>Y</Explode> </TermSet> <OP>AND</OP> </TranslationStack><QueryTranslation>hapmap[WORD] AND inprogress[SEQS]</QueryTranslation></eSearchResult>
|
@@ -0,0 +1,4 @@
|
|
1
|
+
<?xml version="1.0" ?>
|
2
|
+
<!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD eSearchResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSearch_020511.dtd">
|
3
|
+
<eSearchResult><Count>41</Count><RetMax>20</RetMax><RetStart>0</RetStart><IdList>
|
4
|
+
</IdList><TranslationSet/><TranslationStack> <TermSet> <Term>hapmap[WORD]</Term> <Field>WORD</Field> <Count>3</Count> <Explode>Y</Explode> </TermSet> <TermSet> <Term>inprogress[SEQS]</Term> <Field>SEQS</Field> <Count>3913</Count> <Explode>Y</Explode> </TermSet> <OP>AND</OP> </TranslationStack><QueryTranslation>hapmap[WORD] AND inprogress[SEQS]</QueryTranslation></eSearchResult>
|
data/spec/support/macros.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module Macros
|
2
2
|
|
3
|
-
def
|
4
|
-
File.open(File.join(File.dirname(__FILE__), 'fixtures/', file_name))
|
3
|
+
def fixture_file(file_name)
|
4
|
+
File.open(File.join(File.dirname(__FILE__), 'fixtures/', file_name))
|
5
5
|
end
|
6
6
|
|
7
7
|
# Return how long it takes to run block.
|
@@ -12,4 +12,16 @@ module Macros
|
|
12
12
|
end_time - start_time
|
13
13
|
end
|
14
14
|
|
15
|
+
# Use FakeWeb to simulate Entrez service with contents of fixture file.
|
16
|
+
# Since the generated URL is a bit difficult to capture,
|
17
|
+
# Faked uri will just match regular expression of service.
|
18
|
+
# When block ends, clean registry.
|
19
|
+
def fake_service(service, fixture_file_name)
|
20
|
+
file_contents = fixture_file(fixture_file_name).read
|
21
|
+
FakeWeb.register_uri(:get, Regexp.new(service.to_s.downcase), body: file_contents, content_type: 'text/xml')
|
22
|
+
yield
|
23
|
+
ensure
|
24
|
+
FakeWeb.clean_registry
|
25
|
+
end
|
26
|
+
|
15
27
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: entrez
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.5.
|
5
|
+
version: 0.5.7
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Jared Ning
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-
|
13
|
+
date: 2011-08-15 00:00:00 -05:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
@@ -25,16 +25,38 @@ dependencies:
|
|
25
25
|
type: :runtime
|
26
26
|
version_requirements: *id001
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: awesome_print
|
29
29
|
prerelease: false
|
30
30
|
requirement: &id002 !ruby/object:Gem::Requirement
|
31
31
|
none: false
|
32
32
|
requirements:
|
33
|
-
- - "
|
33
|
+
- - ">="
|
34
34
|
- !ruby/object:Gem::Version
|
35
|
-
version:
|
35
|
+
version: "0"
|
36
36
|
type: :development
|
37
37
|
version_requirements: *id002
|
38
|
+
- !ruby/object:Gem::Dependency
|
39
|
+
name: fakeweb
|
40
|
+
prerelease: false
|
41
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
43
|
+
requirements:
|
44
|
+
- - "="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 1.3.0
|
47
|
+
type: :development
|
48
|
+
version_requirements: *id003
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: rspec
|
51
|
+
prerelease: false
|
52
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - "="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: 2.6.0
|
58
|
+
type: :development
|
59
|
+
version_requirements: *id004
|
38
60
|
description: Simple API for HTTP requests to Entrez E-utilities
|
39
61
|
email:
|
40
62
|
- jared@redningja.com
|
@@ -47,6 +69,7 @@ extra_rdoc_files: []
|
|
47
69
|
files:
|
48
70
|
- .gemtest
|
49
71
|
- .gitignore
|
72
|
+
- .rspec
|
50
73
|
- .rvmrc
|
51
74
|
- Gemfile
|
52
75
|
- README.rdoc
|
@@ -54,10 +77,16 @@ files:
|
|
54
77
|
- entrez.gemspec
|
55
78
|
- lib/entrez.rb
|
56
79
|
- lib/entrez/version.rb
|
80
|
+
- lib/httparty/response/ids.rb
|
81
|
+
- lib/httparty/response_ext.rb
|
57
82
|
- lib/query_string_normalizer.rb
|
58
83
|
- spec/entrez_spec.rb
|
59
84
|
- spec/query_string_normalizer_spec.rb
|
85
|
+
- spec/response_ids_spec.rb
|
60
86
|
- spec/spec_helper.rb
|
87
|
+
- spec/support/fixtures/esearch_1.xml
|
88
|
+
- spec/support/fixtures/esearch_1_2_3.xml
|
89
|
+
- spec/support/fixtures/esearch_empty.xml
|
61
90
|
- spec/support/macros.rb
|
62
91
|
- spec/support/matchers.rb
|
63
92
|
has_rdoc: true
|
@@ -91,6 +120,10 @@ summary: HTTP requests to Entrez E-utilities
|
|
91
120
|
test_files:
|
92
121
|
- spec/entrez_spec.rb
|
93
122
|
- spec/query_string_normalizer_spec.rb
|
123
|
+
- spec/response_ids_spec.rb
|
94
124
|
- spec/spec_helper.rb
|
125
|
+
- spec/support/fixtures/esearch_1.xml
|
126
|
+
- spec/support/fixtures/esearch_1_2_3.xml
|
127
|
+
- spec/support/fixtures/esearch_empty.xml
|
95
128
|
- spec/support/macros.rb
|
96
129
|
- spec/support/matchers.rb
|