entrez 0.5.6 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --colour
@@ -54,9 +54,14 @@ args:
54
54
 
55
55
  Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'}, retmode: :xml)
56
56
  #=> makes request to http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=genomeprj&term=hapmap[WORD]+AND+inprogress[SEQS]&retmode=xml.
57
- #=> returns XML document with list of Ids of genome projects that match the searc term criteria.
57
+ #=> returns XML document with list of ids of genome projects that match the searc term criteria.
58
58
  #=> i.e. genome projects that have 'hapmap' in the description and whose sequencing status is 'inprogress'.
59
59
 
60
+ The response has a convenience method to retrieve the parsed ids.
61
+
62
+ response = Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'}, retmode: :xml)
63
+ response.ids #=> [1, 2, ...]
64
+
60
65
  ==== Customized Queries
61
66
 
62
67
  You can build your own customized queries if you have something more complex with ANDs and ORs.
@@ -19,7 +19,10 @@ Gem::Specification.new do |s|
19
19
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
20
  s.require_paths = ["lib"]
21
21
 
22
- s.add_runtime_dependency 'httparty' # Written with version 0.7.3.
23
- s.add_development_dependency 'rspec', '2.4.0'
22
+ s.add_runtime_dependency 'httparty' # Written with version 0.7.8.
23
+
24
+ s.add_development_dependency 'awesome_print'
25
+ s.add_development_dependency 'fakeweb', '1.3.0'
26
+ s.add_development_dependency 'rspec', '2.6.0'
24
27
 
25
28
  end
@@ -1,4 +1,5 @@
1
1
  require 'httparty'
2
+ require 'httparty/response_ext'
2
3
  require 'query_string_normalizer'
3
4
 
4
5
  class Entrez
@@ -21,12 +22,10 @@ class Entrez
21
22
  end
22
23
 
23
24
  # E.g. Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'}, retmode: :xml)
24
- # returns response. For convenience, response.ids() returns array of ID integers from result set.
25
25
  # search_terms can also be string literal.
26
26
  def ESearch(db, search_terms = {}, params = {})
27
27
  params[:term] = search_terms.is_a?(Hash) ? convert_search_term_hash(search_terms) : search_terms
28
28
  response = perform '/esearch.fcgi', db, params
29
- parse_ids_and_extend response if response[:retmode].nil? || response[:retmode] == :xml
30
29
  response
31
30
  end
32
31
 
@@ -58,15 +57,18 @@ class Entrez
58
57
 
59
58
  private
60
59
 
60
+ # NCBI does not allow more than 3 requests per second.
61
+ # Unless 3 requests ago was more than 1 second ago,
62
+ # sleep for enough time to honor limit.
61
63
  def respect_query_limit
62
64
  three_requests_ago = request_times[-3]
63
65
  return unless three_requests_ago
64
- three_requests_ago = three_requests_ago.to_f
65
- now = Time.now.to_f
66
- enough_time_has_passed = now > three_requests_ago + 1
66
+ time_for_last_3_requeests = Time.now.to_f - three_requests_ago
67
+ enough_time_has_passed = time_for_last_3_requeests >= 1
67
68
  unless enough_time_has_passed
68
- STDERR.puts "sleeping #{now - three_requests_ago}"
69
- sleep(now - three_requests_ago)
69
+ sleep_time = 1 - time_for_last_3_requeests
70
+ STDERR.puts "sleeping #{sleep_time}"
71
+ sleep(sleep_time)
70
72
  end
71
73
  end
72
74
 
@@ -74,22 +76,6 @@ class Entrez
74
76
  @request_times ||= []
75
77
  end
76
78
 
77
- # Define ids() method which will parse and return the IDs from the XML response.
78
- def parse_ids_and_extend(response)
79
- response.instance_eval do
80
- def ids
81
- return @ids if @ids
82
- id_content = self['eSearchResult']['IdList']['Id']
83
- # If there is only 1, Crack will parse it and return just the string.
84
- # Need to always return array.
85
- id_content = [id_content].flatten
86
- @ids = id_content.map(&:to_i)
87
- rescue ::NoMethodError
88
- @ids = []
89
- end
90
- end
91
- end
92
-
93
79
  end
94
80
 
95
81
  class UnknownOperator < StandardError
@@ -1,3 +1,3 @@
1
1
  module Entrez
2
- VERSION = "0.5.6"
2
+ VERSION = "0.5.7"
3
3
  end
@@ -0,0 +1,32 @@
1
+ module HTTParty
2
+
3
+ module ResponseIds
4
+
5
+ # For ESearch, add convenience method that parses ids and converts to array of integers.
6
+ # Only works if either no retern mode specified or if it is :xml.
7
+ def ids
8
+ if parse_ids?
9
+ return @ids if @ids
10
+ id_list = parsed_response['eSearchResult']['IdList']
11
+ if id_list
12
+ id_content = id_list['Id']
13
+ id_content = [id_content].flatten
14
+ @ids = id_content.map(&:to_i)
15
+ else
16
+ @ids = []
17
+ end
18
+ end
19
+ end
20
+
21
+ private
22
+
23
+ # Parse only if this is an ESearch request and in xml format.
24
+ def parse_ids?
25
+ esearch_request? && (retmode.nil? || xml?)
26
+ end
27
+
28
+ end
29
+
30
+ end
31
+
32
+ HTTParty::Response.send :include, HTTParty::ResponseIds
@@ -0,0 +1,22 @@
1
+ require 'httparty/response/ids'
2
+
3
+ module HTTParty
4
+ module ResponseExt
5
+
6
+ # Get the return mode from request.
7
+ def retmode
8
+ request.options[:query][:retmode]
9
+ end
10
+
11
+ def xml?
12
+ retmode == :xml
13
+ end
14
+
15
+ def esearch_request?
16
+ request.path.to_s.match /esearch/
17
+ end
18
+
19
+ end
20
+ end
21
+
22
+ HTTParty::Response.send :include, HTTParty::ResponseExt
@@ -8,7 +8,7 @@ describe Entrez do
8
8
  end
9
9
 
10
10
  it '#EFetch retrieves results' do
11
- response = Entrez.EFetch('taxonomy', id: 9606, retmode: :xml)
11
+ response = Entrez.EFetch('taxonomy', id: 9606)
12
12
  response.body.should include('Homo sapiens')
13
13
  end
14
14
 
@@ -18,39 +18,23 @@ describe Entrez do
18
18
  end
19
19
 
20
20
  it '#EInfo retrieves results' do
21
- response = Entrez.EInfo('snp', retmode: :xml)
21
+ response = Entrez.EInfo('snp')
22
22
  response.body.should include('<Name>RS</Name>')
23
23
  end
24
24
 
25
25
  context '#ESearch' do
26
26
 
27
27
  it 'retrieves results' do
28
- response = Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'}, retmode: :xml)
28
+ response = Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'})
29
29
  response.body.should include('28911')
30
30
  end
31
31
 
32
- it 'response returns IDs for convenience' do
33
- response = Entrez.ESearch('genomeprj', {WORD: 'hapmap', SEQS: 'inprogress'}, retmode: :xml)
34
- response.ids.should == [60153, 29429, 28911, 48101, 59851, 59849, 59847, 59845, 59839, 59835, 59833, 59831, 51895, 59829, 59827, 60835, 59811, 60831, 60819, 33895]
35
- end
36
-
37
- it 'returns empty array if nothing found' do
38
- response = Entrez.ESearch('genomeprj', {NON_EXISTENT_SEARCH_FIELD: 'does not exist even in oompaloompa land'}, retmode: :xml)
39
- response.ids.should be_empty
40
- end
41
-
42
- it 'returns array even if only 1 id found' do
43
- id = 60153
44
- response = Entrez.ESearch('genomeprj', {uid: id}, retmode: :xml)
45
- response.ids.should == [id]
46
- end
47
-
48
32
  it 'accepts string as search_terms parameter' do
49
- response = Entrez.ESearch('genomeprj', 'hapmap[WORD]', retmode: :xml)
33
+ response = Entrez.ESearch('genomeprj', 'hapmap[WORD]')
50
34
  response.ids.should include(60153)
51
35
  end
52
36
 
53
- it 'can handle array of uids' do
37
+ it 'handles array of uids' do
54
38
  response = Entrez.ESearch('gene', {UID: [1, 2, 3]})
55
39
  response.ids.should =~ [1, 2, 3]
56
40
  end
@@ -58,8 +42,10 @@ describe Entrez do
58
42
  end
59
43
 
60
44
  it 'should respect query limit' do
61
- requests = proc { 4.times { Entrez.EFetch('taxonomy', id: 9606) } }
62
- requests.should take_longer_than(1.0)
45
+ fake_service :ESearch, 'esearch_empty.xml' do
46
+ requests = proc { 4.times { Entrez.ESearch('asdf') } }
47
+ requests.should take_longer_than(1.0)
48
+ end
63
49
  end
64
50
 
65
51
  it 'should convert search term hash into query string with AND operator by default' do
@@ -0,0 +1,26 @@
1
+ require 'spec_helper'
2
+
3
+ describe HTTParty::ResponseIds do
4
+
5
+ it 'parses body and returns IDs' do
6
+ fake_service :ESearch, 'esearch_1_2_3.xml' do
7
+ response = Entrez.ESearch('asdf')
8
+ response.ids.should == [1, 2, 3]
9
+ end
10
+ end
11
+
12
+ it 'returns empty array if nothing found' do
13
+ fake_service :ESearch, 'esearch_empty.xml' do
14
+ response = Entrez.ESearch('asdf')
15
+ response.ids.should be_empty
16
+ end
17
+ end
18
+
19
+ it 'returns array even if only 1 id found' do
20
+ fake_service :ESearch, 'esearch_1.xml' do
21
+ response = Entrez.ESearch('asdf')
22
+ response.ids.should == [1]
23
+ end
24
+ end
25
+
26
+ end
@@ -1,10 +1,12 @@
1
+ require 'awesome_print'
2
+ require 'fakeweb'
1
3
  require 'pathname'
2
4
 
3
5
  require File.join(Pathname(__FILE__).dirname.expand_path, '../lib/entrez')
4
-
5
- # require support .rb files.
6
+ Entrez.default_params(
7
+ retmode: :xml,
8
+ )
6
9
  Dir[File.expand_path("../support/**/*.rb", __FILE__)].each {|f| require f}
7
-
8
10
  RSpec.configure do |config|
9
11
  config.include(Macros)
10
12
  end
@@ -0,0 +1,5 @@
1
+ <?xml version="1.0" ?>
2
+ <!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD eSearchResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSearch_020511.dtd">
3
+ <eSearchResult><Count>41</Count><RetMax>20</RetMax><RetStart>0</RetStart><IdList>
4
+ <Id>1</Id>
5
+ </IdList><TranslationSet/><TranslationStack> <TermSet> <Term>hapmap[WORD]</Term> <Field>WORD</Field> <Count>3</Count> <Explode>Y</Explode> </TermSet> <TermSet> <Term>inprogress[SEQS]</Term> <Field>SEQS</Field> <Count>3913</Count> <Explode>Y</Explode> </TermSet> <OP>AND</OP> </TranslationStack><QueryTranslation>hapmap[WORD] AND inprogress[SEQS]</QueryTranslation></eSearchResult>
@@ -0,0 +1,7 @@
1
+ <?xml version="1.0" ?>
2
+ <!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD eSearchResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSearch_020511.dtd">
3
+ <eSearchResult><Count>41</Count><RetMax>20</RetMax><RetStart>0</RetStart><IdList>
4
+ <Id>1</Id>
5
+ <Id>2</Id>
6
+ <Id>3</Id>
7
+ </IdList><TranslationSet/><TranslationStack> <TermSet> <Term>hapmap[WORD]</Term> <Field>WORD</Field> <Count>3</Count> <Explode>Y</Explode> </TermSet> <TermSet> <Term>inprogress[SEQS]</Term> <Field>SEQS</Field> <Count>3913</Count> <Explode>Y</Explode> </TermSet> <OP>AND</OP> </TranslationStack><QueryTranslation>hapmap[WORD] AND inprogress[SEQS]</QueryTranslation></eSearchResult>
@@ -0,0 +1,4 @@
1
+ <?xml version="1.0" ?>
2
+ <!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD eSearchResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSearch_020511.dtd">
3
+ <eSearchResult><Count>41</Count><RetMax>20</RetMax><RetStart>0</RetStart><IdList>
4
+ </IdList><TranslationSet/><TranslationStack> <TermSet> <Term>hapmap[WORD]</Term> <Field>WORD</Field> <Count>3</Count> <Explode>Y</Explode> </TermSet> <TermSet> <Term>inprogress[SEQS]</Term> <Field>SEQS</Field> <Count>3913</Count> <Explode>Y</Explode> </TermSet> <OP>AND</OP> </TranslationStack><QueryTranslation>hapmap[WORD] AND inprogress[SEQS]</QueryTranslation></eSearchResult>
@@ -1,7 +1,7 @@
1
1
  module Macros
2
2
 
3
- def file_fixture(file_name)
4
- File.open(File.join(File.dirname(__FILE__), 'fixtures/', file_name)).read
3
+ def fixture_file(file_name)
4
+ File.open(File.join(File.dirname(__FILE__), 'fixtures/', file_name))
5
5
  end
6
6
 
7
7
  # Return how long it takes to run block.
@@ -12,4 +12,16 @@ module Macros
12
12
  end_time - start_time
13
13
  end
14
14
 
15
+ # Use FakeWeb to simulate Entrez service with contents of fixture file.
16
+ # Since the generated URL is a bit difficult to capture,
17
+ # Faked uri will just match regular expression of service.
18
+ # When block ends, clean registry.
19
+ def fake_service(service, fixture_file_name)
20
+ file_contents = fixture_file(fixture_file_name).read
21
+ FakeWeb.register_uri(:get, Regexp.new(service.to_s.downcase), body: file_contents, content_type: 'text/xml')
22
+ yield
23
+ ensure
24
+ FakeWeb.clean_registry
25
+ end
26
+
15
27
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: entrez
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.5.6
5
+ version: 0.5.7
6
6
  platform: ruby
7
7
  authors:
8
8
  - Jared Ning
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-07-20 00:00:00 -05:00
13
+ date: 2011-08-15 00:00:00 -05:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
@@ -25,16 +25,38 @@ dependencies:
25
25
  type: :runtime
26
26
  version_requirements: *id001
27
27
  - !ruby/object:Gem::Dependency
28
- name: rspec
28
+ name: awesome_print
29
29
  prerelease: false
30
30
  requirement: &id002 !ruby/object:Gem::Requirement
31
31
  none: false
32
32
  requirements:
33
- - - "="
33
+ - - ">="
34
34
  - !ruby/object:Gem::Version
35
- version: 2.4.0
35
+ version: "0"
36
36
  type: :development
37
37
  version_requirements: *id002
38
+ - !ruby/object:Gem::Dependency
39
+ name: fakeweb
40
+ prerelease: false
41
+ requirement: &id003 !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - "="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.3.0
47
+ type: :development
48
+ version_requirements: *id003
49
+ - !ruby/object:Gem::Dependency
50
+ name: rspec
51
+ prerelease: false
52
+ requirement: &id004 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - "="
56
+ - !ruby/object:Gem::Version
57
+ version: 2.6.0
58
+ type: :development
59
+ version_requirements: *id004
38
60
  description: Simple API for HTTP requests to Entrez E-utilities
39
61
  email:
40
62
  - jared@redningja.com
@@ -47,6 +69,7 @@ extra_rdoc_files: []
47
69
  files:
48
70
  - .gemtest
49
71
  - .gitignore
72
+ - .rspec
50
73
  - .rvmrc
51
74
  - Gemfile
52
75
  - README.rdoc
@@ -54,10 +77,16 @@ files:
54
77
  - entrez.gemspec
55
78
  - lib/entrez.rb
56
79
  - lib/entrez/version.rb
80
+ - lib/httparty/response/ids.rb
81
+ - lib/httparty/response_ext.rb
57
82
  - lib/query_string_normalizer.rb
58
83
  - spec/entrez_spec.rb
59
84
  - spec/query_string_normalizer_spec.rb
85
+ - spec/response_ids_spec.rb
60
86
  - spec/spec_helper.rb
87
+ - spec/support/fixtures/esearch_1.xml
88
+ - spec/support/fixtures/esearch_1_2_3.xml
89
+ - spec/support/fixtures/esearch_empty.xml
61
90
  - spec/support/macros.rb
62
91
  - spec/support/matchers.rb
63
92
  has_rdoc: true
@@ -91,6 +120,10 @@ summary: HTTP requests to Entrez E-utilities
91
120
  test_files:
92
121
  - spec/entrez_spec.rb
93
122
  - spec/query_string_normalizer_spec.rb
123
+ - spec/response_ids_spec.rb
94
124
  - spec/spec_helper.rb
125
+ - spec/support/fixtures/esearch_1.xml
126
+ - spec/support/fixtures/esearch_1_2_3.xml
127
+ - spec/support/fixtures/esearch_empty.xml
95
128
  - spec/support/macros.rb
96
129
  - spec/support/matchers.rb