entrez 0.5.6 → 0.5.7

Sign up to get free protection for your applications and to get access to all the features.
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --colour
@@ -54,9 +54,14 @@ args:
54
54
 
55
55
  Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'}, retmode: :xml)
56
56
  #=> makes request to http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=genomeprj&term=hapmap[WORD]+AND+inprogress[SEQS]&retmode=xml.
57
- #=> returns XML document with list of Ids of genome projects that match the searc term criteria.
57
+ #=> returns XML document with list of ids of genome projects that match the searc term criteria.
58
58
  #=> i.e. genome projects that have 'hapmap' in the description and whose sequencing status is 'inprogress'.
59
59
 
60
+ The response has a convenience method to retrieve the parsed ids.
61
+
62
+ response = Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'}, retmode: :xml)
63
+ response.ids #=> [1, 2, ...]
64
+
60
65
  ==== Customized Queries
61
66
 
62
67
  You can build your own customized queries if you have something more complex with ANDs and ORs.
@@ -19,7 +19,10 @@ Gem::Specification.new do |s|
19
19
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
20
  s.require_paths = ["lib"]
21
21
 
22
- s.add_runtime_dependency 'httparty' # Written with version 0.7.3.
23
- s.add_development_dependency 'rspec', '2.4.0'
22
+ s.add_runtime_dependency 'httparty' # Written with version 0.7.8.
23
+
24
+ s.add_development_dependency 'awesome_print'
25
+ s.add_development_dependency 'fakeweb', '1.3.0'
26
+ s.add_development_dependency 'rspec', '2.6.0'
24
27
 
25
28
  end
@@ -1,4 +1,5 @@
1
1
  require 'httparty'
2
+ require 'httparty/response_ext'
2
3
  require 'query_string_normalizer'
3
4
 
4
5
  class Entrez
@@ -21,12 +22,10 @@ class Entrez
21
22
  end
22
23
 
23
24
  # E.g. Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'}, retmode: :xml)
24
- # returns response. For convenience, response.ids() returns array of ID integers from result set.
25
25
  # search_terms can also be string literal.
26
26
  def ESearch(db, search_terms = {}, params = {})
27
27
  params[:term] = search_terms.is_a?(Hash) ? convert_search_term_hash(search_terms) : search_terms
28
28
  response = perform '/esearch.fcgi', db, params
29
- parse_ids_and_extend response if response[:retmode].nil? || response[:retmode] == :xml
30
29
  response
31
30
  end
32
31
 
@@ -58,15 +57,18 @@ class Entrez
58
57
 
59
58
  private
60
59
 
60
+ # NCBI does not allow more than 3 requests per second.
61
+ # Unless 3 requests ago was more than 1 second ago,
62
+ # sleep for enough time to honor limit.
61
63
  def respect_query_limit
62
64
  three_requests_ago = request_times[-3]
63
65
  return unless three_requests_ago
64
- three_requests_ago = three_requests_ago.to_f
65
- now = Time.now.to_f
66
- enough_time_has_passed = now > three_requests_ago + 1
66
+ time_for_last_3_requeests = Time.now.to_f - three_requests_ago
67
+ enough_time_has_passed = time_for_last_3_requeests >= 1
67
68
  unless enough_time_has_passed
68
- STDERR.puts "sleeping #{now - three_requests_ago}"
69
- sleep(now - three_requests_ago)
69
+ sleep_time = 1 - time_for_last_3_requeests
70
+ STDERR.puts "sleeping #{sleep_time}"
71
+ sleep(sleep_time)
70
72
  end
71
73
  end
72
74
 
@@ -74,22 +76,6 @@ class Entrez
74
76
  @request_times ||= []
75
77
  end
76
78
 
77
- # Define ids() method which will parse and return the IDs from the XML response.
78
- def parse_ids_and_extend(response)
79
- response.instance_eval do
80
- def ids
81
- return @ids if @ids
82
- id_content = self['eSearchResult']['IdList']['Id']
83
- # If there is only 1, Crack will parse it and return just the string.
84
- # Need to always return array.
85
- id_content = [id_content].flatten
86
- @ids = id_content.map(&:to_i)
87
- rescue ::NoMethodError
88
- @ids = []
89
- end
90
- end
91
- end
92
-
93
79
  end
94
80
 
95
81
  class UnknownOperator < StandardError
@@ -1,3 +1,3 @@
1
1
  module Entrez
2
- VERSION = "0.5.6"
2
+ VERSION = "0.5.7"
3
3
  end
@@ -0,0 +1,32 @@
1
+ module HTTParty
2
+
3
+ module ResponseIds
4
+
5
+ # For ESearch, add convenience method that parses ids and converts to array of integers.
6
+ # Only works if either no retern mode specified or if it is :xml.
7
+ def ids
8
+ if parse_ids?
9
+ return @ids if @ids
10
+ id_list = parsed_response['eSearchResult']['IdList']
11
+ if id_list
12
+ id_content = id_list['Id']
13
+ id_content = [id_content].flatten
14
+ @ids = id_content.map(&:to_i)
15
+ else
16
+ @ids = []
17
+ end
18
+ end
19
+ end
20
+
21
+ private
22
+
23
+ # Parse only if this is an ESearch request and in xml format.
24
+ def parse_ids?
25
+ esearch_request? && (retmode.nil? || xml?)
26
+ end
27
+
28
+ end
29
+
30
+ end
31
+
32
+ HTTParty::Response.send :include, HTTParty::ResponseIds
@@ -0,0 +1,22 @@
1
+ require 'httparty/response/ids'
2
+
3
+ module HTTParty
4
+ module ResponseExt
5
+
6
+ # Get the return mode from request.
7
+ def retmode
8
+ request.options[:query][:retmode]
9
+ end
10
+
11
+ def xml?
12
+ retmode == :xml
13
+ end
14
+
15
+ def esearch_request?
16
+ request.path.to_s.match /esearch/
17
+ end
18
+
19
+ end
20
+ end
21
+
22
+ HTTParty::Response.send :include, HTTParty::ResponseExt
@@ -8,7 +8,7 @@ describe Entrez do
8
8
  end
9
9
 
10
10
  it '#EFetch retrieves results' do
11
- response = Entrez.EFetch('taxonomy', id: 9606, retmode: :xml)
11
+ response = Entrez.EFetch('taxonomy', id: 9606)
12
12
  response.body.should include('Homo sapiens')
13
13
  end
14
14
 
@@ -18,39 +18,23 @@ describe Entrez do
18
18
  end
19
19
 
20
20
  it '#EInfo retrieves results' do
21
- response = Entrez.EInfo('snp', retmode: :xml)
21
+ response = Entrez.EInfo('snp')
22
22
  response.body.should include('<Name>RS</Name>')
23
23
  end
24
24
 
25
25
  context '#ESearch' do
26
26
 
27
27
  it 'retrieves results' do
28
- response = Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'}, retmode: :xml)
28
+ response = Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'})
29
29
  response.body.should include('28911')
30
30
  end
31
31
 
32
- it 'response returns IDs for convenience' do
33
- response = Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'}, retmode: :xml)
34
- response.ids.should == [60153, 29429, 28911, 48101, 59851, 59849, 59847, 59845, 59839, 59835, 59833, 59831, 51895, 59829, 59827, 60835, 59811, 60831, 60819, 33895]
35
- end
36
-
37
- it 'returns empty array if nothing found' do
38
- response = Entrez.ESearch('genomeprj', {NON_EXISTENT_SEARCH_FIELD: 'does not exist even in oompaloompa land'}, retmode: :xml)
39
- response.ids.should be_empty
40
- end
41
-
42
- it 'returns array even if only 1 id found' do
43
- id = 60153
44
- response = Entrez.ESearch('genomeprj', {uid: id}, retmode: :xml)
45
- response.ids.should == [id]
46
- end
47
-
48
32
  it 'accepts string as search_terms parameter' do
49
- response = Entrez.ESearch('genomeprj', 'hapmap[WORD]', retmode: :xml)
33
+ response = Entrez.ESearch('genomeprj', 'hapmap[WORD]')
50
34
  response.ids.should include(60153)
51
35
  end
52
36
 
53
- it 'can handle array of uids' do
37
+ it 'handles array of uids' do
54
38
  response = Entrez.ESearch('gene', {UID: [1, 2, 3]})
55
39
  response.ids.should =~ [1, 2, 3]
56
40
  end
@@ -58,8 +42,10 @@ describe Entrez do
58
42
  end
59
43
 
60
44
  it 'should respect query limit' do
61
- requests = proc { 4.times { Entrez.EFetch('taxonomy', id: 9606) } }
62
- requests.should take_longer_than(1.0)
45
+ fake_service :ESearch, 'esearch_empty.xml' do
46
+ requests = proc { 4.times { Entrez.ESearch('asdf') } }
47
+ requests.should take_longer_than(1.0)
48
+ end
63
49
  end
64
50
 
65
51
  it 'should convert search term hash into query string with AND operator by default' do
@@ -0,0 +1,26 @@
1
+ require 'spec_helper'
2
+
3
+ describe HTTParty::ResponseIds do
4
+
5
+ it 'parses body and returns IDs' do
6
+ fake_service :ESearch, 'esearch_1_2_3.xml' do
7
+ response = Entrez.ESearch('asdf')
8
+ response.ids.should == [1, 2, 3]
9
+ end
10
+ end
11
+
12
+ it 'returns empty array if nothing found' do
13
+ fake_service :ESearch, 'esearch_empty.xml' do
14
+ response = Entrez.ESearch('asdf')
15
+ response.ids.should be_empty
16
+ end
17
+ end
18
+
19
+ it 'returns array even if only 1 id found' do
20
+ fake_service :ESearch, 'esearch_1.xml' do
21
+ response = Entrez.ESearch('asdf')
22
+ response.ids.should == [1]
23
+ end
24
+ end
25
+
26
+ end
@@ -1,10 +1,12 @@
1
+ require 'awesome_print'
2
+ require 'fakeweb'
1
3
  require 'pathname'
2
4
 
3
5
  require File.join(Pathname(__FILE__).dirname.expand_path, '../lib/entrez')
4
-
5
- # require support .rb files.
6
+ Entrez.default_params(
7
+ retmode: :xml,
8
+ )
6
9
  Dir[File.expand_path("../support/**/*.rb", __FILE__)].each {|f| require f}
7
-
8
10
  RSpec.configure do |config|
9
11
  config.include(Macros)
10
12
  end
@@ -0,0 +1,5 @@
1
+ <?xml version="1.0" ?>
2
+ <!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD eSearchResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSearch_020511.dtd">
3
+ <eSearchResult><Count>41</Count><RetMax>20</RetMax><RetStart>0</RetStart><IdList>
4
+ <Id>1</Id>
5
+ </IdList><TranslationSet/><TranslationStack> <TermSet> <Term>hapmap[WORD]</Term> <Field>WORD</Field> <Count>3</Count> <Explode>Y</Explode> </TermSet> <TermSet> <Term>inprogress[SEQS]</Term> <Field>SEQS</Field> <Count>3913</Count> <Explode>Y</Explode> </TermSet> <OP>AND</OP> </TranslationStack><QueryTranslation>hapmap[WORD] AND inprogress[SEQS]</QueryTranslation></eSearchResult>
@@ -0,0 +1,7 @@
1
+ <?xml version="1.0" ?>
2
+ <!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD eSearchResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSearch_020511.dtd">
3
+ <eSearchResult><Count>41</Count><RetMax>20</RetMax><RetStart>0</RetStart><IdList>
4
+ <Id>1</Id>
5
+ <Id>2</Id>
6
+ <Id>3</Id>
7
+ </IdList><TranslationSet/><TranslationStack> <TermSet> <Term>hapmap[WORD]</Term> <Field>WORD</Field> <Count>3</Count> <Explode>Y</Explode> </TermSet> <TermSet> <Term>inprogress[SEQS]</Term> <Field>SEQS</Field> <Count>3913</Count> <Explode>Y</Explode> </TermSet> <OP>AND</OP> </TranslationStack><QueryTranslation>hapmap[WORD] AND inprogress[SEQS]</QueryTranslation></eSearchResult>
@@ -0,0 +1,4 @@
1
+ <?xml version="1.0" ?>
2
+ <!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD eSearchResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSearch_020511.dtd">
3
+ <eSearchResult><Count>41</Count><RetMax>20</RetMax><RetStart>0</RetStart><IdList>
4
+ </IdList><TranslationSet/><TranslationStack> <TermSet> <Term>hapmap[WORD]</Term> <Field>WORD</Field> <Count>3</Count> <Explode>Y</Explode> </TermSet> <TermSet> <Term>inprogress[SEQS]</Term> <Field>SEQS</Field> <Count>3913</Count> <Explode>Y</Explode> </TermSet> <OP>AND</OP> </TranslationStack><QueryTranslation>hapmap[WORD] AND inprogress[SEQS]</QueryTranslation></eSearchResult>
@@ -1,7 +1,7 @@
1
1
  module Macros
2
2
 
3
- def file_fixture(file_name)
4
- File.open(File.join(File.dirname(__FILE__), 'fixtures/', file_name)).read
3
+ def fixture_file(file_name)
4
+ File.open(File.join(File.dirname(__FILE__), 'fixtures/', file_name))
5
5
  end
6
6
 
7
7
  # Return how long it takes to run block.
@@ -12,4 +12,16 @@ module Macros
12
12
  end_time - start_time
13
13
  end
14
14
 
15
+ # Use FakeWeb to simulate Entrez service with contents of fixture file.
16
+ # Since the generated URL is a bit difficult to capture,
17
+ # Faked uri will just match regular expression of service.
18
+ # When block ends, clean registry.
19
+ def fake_service(service, fixture_file_name)
20
+ file_contents = fixture_file(fixture_file_name).read
21
+ FakeWeb.register_uri(:get, Regexp.new(service.to_s.downcase), body: file_contents, content_type: 'text/xml')
22
+ yield
23
+ ensure
24
+ FakeWeb.clean_registry
25
+ end
26
+
15
27
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: entrez
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.5.6
5
+ version: 0.5.7
6
6
  platform: ruby
7
7
  authors:
8
8
  - Jared Ning
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-07-20 00:00:00 -05:00
13
+ date: 2011-08-15 00:00:00 -05:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
@@ -25,16 +25,38 @@ dependencies:
25
25
  type: :runtime
26
26
  version_requirements: *id001
27
27
  - !ruby/object:Gem::Dependency
28
- name: rspec
28
+ name: awesome_print
29
29
  prerelease: false
30
30
  requirement: &id002 !ruby/object:Gem::Requirement
31
31
  none: false
32
32
  requirements:
33
- - - "="
33
+ - - ">="
34
34
  - !ruby/object:Gem::Version
35
- version: 2.4.0
35
+ version: "0"
36
36
  type: :development
37
37
  version_requirements: *id002
38
+ - !ruby/object:Gem::Dependency
39
+ name: fakeweb
40
+ prerelease: false
41
+ requirement: &id003 !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - "="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.3.0
47
+ type: :development
48
+ version_requirements: *id003
49
+ - !ruby/object:Gem::Dependency
50
+ name: rspec
51
+ prerelease: false
52
+ requirement: &id004 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - "="
56
+ - !ruby/object:Gem::Version
57
+ version: 2.6.0
58
+ type: :development
59
+ version_requirements: *id004
38
60
  description: Simple API for HTTP requests to Entrez E-utilities
39
61
  email:
40
62
  - jared@redningja.com
@@ -47,6 +69,7 @@ extra_rdoc_files: []
47
69
  files:
48
70
  - .gemtest
49
71
  - .gitignore
72
+ - .rspec
50
73
  - .rvmrc
51
74
  - Gemfile
52
75
  - README.rdoc
@@ -54,10 +77,16 @@ files:
54
77
  - entrez.gemspec
55
78
  - lib/entrez.rb
56
79
  - lib/entrez/version.rb
80
+ - lib/httparty/response/ids.rb
81
+ - lib/httparty/response_ext.rb
57
82
  - lib/query_string_normalizer.rb
58
83
  - spec/entrez_spec.rb
59
84
  - spec/query_string_normalizer_spec.rb
85
+ - spec/response_ids_spec.rb
60
86
  - spec/spec_helper.rb
87
+ - spec/support/fixtures/esearch_1.xml
88
+ - spec/support/fixtures/esearch_1_2_3.xml
89
+ - spec/support/fixtures/esearch_empty.xml
61
90
  - spec/support/macros.rb
62
91
  - spec/support/matchers.rb
63
92
  has_rdoc: true
@@ -91,6 +120,10 @@ summary: HTTP requests to Entrez E-utilities
91
120
  test_files:
92
121
  - spec/entrez_spec.rb
93
122
  - spec/query_string_normalizer_spec.rb
123
+ - spec/response_ids_spec.rb
94
124
  - spec/spec_helper.rb
125
+ - spec/support/fixtures/esearch_1.xml
126
+ - spec/support/fixtures/esearch_1_2_3.xml
127
+ - spec/support/fixtures/esearch_empty.xml
95
128
  - spec/support/macros.rb
96
129
  - spec/support/matchers.rb