biomart 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,8 @@
1
+ === 0.1.4 2010-01-20
2
+
3
+ * 1 minor enhancement:
4
+ * Added in the ability to override the default timeout on requests.
5
+
1
6
  === 0.1.3 2009-10-30
2
7
 
3
8
  * 1 major bugfix:
@@ -12,6 +12,7 @@ lib/biomart/server.rb
12
12
  script/console
13
13
  script/destroy
14
14
  script/generate
15
- tasks/shoulda.rake
15
+ tasks/metrics.task
16
+ tasks/shoulda.task
16
17
  test/test_biomart.rb
17
18
  test/test_helper.rb
@@ -2,7 +2,7 @@
2
2
 
3
3
  http://github.com/dazoakley/biomart
4
4
 
5
- http://rubyforge.org/projects/biomart
5
+ http://rdoc.info/projects/dazoakley/biomart
6
6
 
7
7
  Biomart provides a simple interface for working with Biomart servers
8
8
  (see http://www.biomart.org for more info on Biomart itself), so you
@@ -92,13 +92,28 @@ Now all requests made through Biomart will be proxied via proxy.example.com.
92
92
  Alternatively you can also set your proxy url in the environment variable
93
93
  'http_proxy', and Biomart will automatically detect this.
94
94
 
95
+ == Setting a Timeout Value
96
+
97
+ If you need to alter the default request timeout value, you can alter this
98
+ globally via Biomart.timeout:
99
+
100
+ Biomart.timeout = 60 # For a 60 second timeout...
101
+
102
+ Or on a per-request basis by passing the :timeout option when searching:
103
+
104
+ kermits.search(
105
+ :filters => { "marker_symbol" => "Cbx1" },
106
+ :process_results => true,
107
+ :timeout => 60
108
+ )
109
+
95
110
  == Meta
96
111
 
97
112
  Written by Darren Oakley (daz dot oakley at gmail dot com)
98
113
 
99
114
  http://github.com/dazoakley/biomart
100
115
 
101
- http://rubyforge.org/projects/biomart
116
+ http://rdoc.info/projects/dazoakley/biomart
102
117
 
103
118
  == License
104
119
 
data/Rakefile CHANGED
@@ -22,7 +22,7 @@ $hoe = Hoe.spec "biomart" do
22
22
  end
23
23
 
24
24
  require "newgem/tasks"
25
- Dir["tasks/**/*.rake"].each { |t| load t }
25
+ Dir["tasks/*.task"].each { |t| load t }
26
26
 
27
27
  # TODO - want other tests/tasks run by default? Add them to the list
28
28
  # remove_task :default
@@ -2,15 +2,15 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{biomart}
5
- s.version = "0.1.3"
5
+ s.version = "0.1.4"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Darren Oakley"]
9
- s.date = %q{2009-10-30}
9
+ s.date = %q{2010-01-20}
10
10
  s.description = %q{A ruby API for interacting with Biomart services.}
11
11
  s.email = ["daz.oakley@gmail.com"]
12
12
  s.extra_rdoc_files = ["History.txt", "Manifest.txt", "README.rdoc"]
13
- s.files = ["History.txt", "Manifest.txt", "README.rdoc", "Rakefile", "biomart.gemspec", "lib/biomart.rb", "lib/biomart/attribute.rb", "lib/biomart/database.rb", "lib/biomart/dataset.rb", "lib/biomart/filter.rb", "lib/biomart/server.rb", "script/console", "script/destroy", "script/generate", "tasks/shoulda.rake", "test/test_biomart.rb", "test/test_helper.rb"]
13
+ s.files = ["History.txt", "Manifest.txt", "README.rdoc", "Rakefile", "biomart.gemspec", "lib/biomart.rb", "lib/biomart/attribute.rb", "lib/biomart/database.rb", "lib/biomart/dataset.rb", "lib/biomart/filter.rb", "lib/biomart/server.rb", "script/console", "script/destroy", "script/generate", "tasks/metrics.task", "tasks/shoulda.task", "test/test_biomart.rb", "test/test_helper.rb"]
14
14
  s.homepage = %q{http://github.com/dazoakley/biomart}
15
15
  s.rdoc_options = ["--main", "README.rdoc"]
16
16
  s.require_paths = ["lib"]
@@ -6,8 +6,16 @@ require "csv"
6
6
  require "rubygems"
7
7
  require "builder"
8
8
 
9
+ #begin
10
+ # require "curb"
11
+ # use_curb = true
12
+ #rescue LoadError
13
+ # use_curb = false
14
+ #end
15
+ #CURB_AVAILABLE = use_curb
16
+
9
17
  module Biomart
10
- VERSION = "0.1.3"
18
+ VERSION = "0.1.4"
11
19
 
12
20
  # This is the base Biomart error/exception class. Rescue it if
13
21
  # you want to catch any exceptions that this code might raise.
@@ -38,50 +46,122 @@ module Biomart
38
46
  # dataset.
39
47
  class DatasetError < BiomartError; end
40
48
 
41
- @@url = 'http://www.biomart.org/biomart/martservice'
42
- @@client = Net::HTTP
43
-
44
49
  # Centralised request function for handling all of the HTTP requests
45
50
  # to the biomart servers.
46
51
  def request( params={} )
47
- if Biomart.proxy or ENV['http_proxy']
48
- proxy_uri = Biomart.proxy
49
- proxy_uri ||= ENV['http_proxy']
50
- proxy = URI.parse( proxy_uri )
51
- @@client = Net::HTTP::Proxy( proxy.host, proxy.port )
52
- end
52
+ net_http_request(params)
53
53
 
54
- params[:url] = URI.escape(params[:url])
54
+ #if CURB_AVAILABLE and ( Biomart.use_net_http != true )
55
+ # curb_request(params)
56
+ #else
57
+ # net_http_request(params)
58
+ #end
59
+ end
60
+
61
+ class << self
62
+ attr_accessor :proxy, :timeout, :use_net_http
63
+ end
64
+
65
+ private
55
66
 
56
- if params[:method] === 'post'
57
- res = @@client.post_form( URI.parse(params[:url]), { "query" => params[:query] } )
58
- else
59
- res = @@client.get_response( URI.parse(params[:url]) )
67
+ # Utility function to perform the request method using the curb
68
+ # gem (a wrapper around libcurl) - supposed to be faster than
69
+ # Net::HTTP.
70
+ def curb_request( params={} )
71
+ client = Curl::Easy.new( params[:url] )
72
+
73
+ if Biomart.timeout or params[:timeout]
74
+ client.connect_timeout = params[:timeout] ? params[:timeout] : Biomart.timeout
75
+ end
76
+
77
+ if proxy_url() then client.proxy_url = proxy_url() end
78
+
79
+ case params[:method]
80
+ when 'post'
81
+ client.http_post( Curl::PostField.content( "query", params[:query], "text/xml" ) )
82
+ else
83
+ client.http_get
84
+ end
85
+
86
+ check_response( client.body_str, client.response_code )
87
+
88
+ return client.body_str
60
89
  end
61
90
 
62
- # Process the response code/body to catch errors.
63
- if res.code != "200"
64
- raise HTTPError.new(res.code), "HTTP error #{res.code}, please check your biomart server and URL settings."
65
- else
66
- if res.body =~ /ERROR/
67
- if res.body =~ /Filter (.+) NOT FOUND/
68
- raise FilterError.new(res.body), "Biomart error. Filter #{$1} not found."
69
- elsif res.body =~ /Attribute (.+) NOT FOUND/
70
- raise AttributeError.new(res.body), "Biomart error. Attribute #{$1} not found."
71
- elsif res.body =~ /Dataset (.+) NOT FOUND/
72
- raise DatasetError.new(res.body), "Biomart error. Dataset #{$1} not found."
73
- else
74
- raise BiomartError.new(res.body), "Biomart error."
91
+ # Utility function to perform the request method using Net::HTTP.
92
+ def net_http_request( params={} )
93
+ uri = URI.parse( params[:url] )
94
+ client = net_http_client()
95
+ req = nil
96
+ response = nil
97
+
98
+ case params[:method]
99
+ when 'post'
100
+ req = Net::HTTP::Post.new(uri.path)
101
+ req.form_data = { "query" => params[:query] }
102
+ else
103
+ req = Net::HTTP::Get.new(uri.request_uri)
104
+ end
105
+
106
+ client.start(uri.host, uri.port) do |http|
107
+ if Biomart.timeout or params[:timeout]
108
+ http.read_timeout = params[:timeout] ? params[:timeout] : Biomart.timeout
109
+ http.open_timeout = params[:timeout] ? params[:timeout] : Biomart.timeout
75
110
  end
111
+ response = http.request(req)
112
+ end
113
+
114
+ check_response( response.body, response.code )
115
+
116
+ return response.body
117
+ end
118
+
119
+ # Utility function to create a Net::HTTP object.
120
+ def net_http_client
121
+ client = Net::HTTP
122
+ if proxy_url()
123
+ proxy = URI.parse( proxy_url() )
124
+ client = Net::HTTP::Proxy( proxy.host, proxy.port )
76
125
  end
126
+ return client
77
127
  end
78
128
 
79
- return res.body
80
- end
81
-
82
- class << self
83
- attr_accessor :proxy
84
- end
129
+ # Utility function to determine if we need to use a proxy. If yes,
130
+ # returns the proxy url, if no, returns false.
131
+ def proxy_url
132
+ if Biomart.proxy or ENV['http_proxy'] or ENV['HTTP_PROXY']
133
+ proxy_uri = Biomart.proxy
134
+ proxy_uri ||= ENV['http_proxy']
135
+ proxy_uri ||= ENV['HTTP_PROXY']
136
+
137
+ return proxy_uri
138
+ else
139
+ return false
140
+ end
141
+ end
142
+
143
+ # Utility function to test the response from a http request.
144
+ # Raises errors if appropriate.
145
+ def check_response( body, code )
146
+ # Process the response code/body to catch errors.
147
+ if code.is_a?(String) then code = code.to_i end
148
+
149
+ if code != 200
150
+ raise HTTPError.new(code), "HTTP error #{code}, please check your biomart server and URL settings."
151
+ else
152
+ if body =~ /ERROR/
153
+ if body =~ /Filter (.+) NOT FOUND/
154
+ raise FilterError.new(body), "Biomart error. Filter #{$1} not found."
155
+ elsif body =~ /Attribute (.+) NOT FOUND/
156
+ raise AttributeError.new(body), "Biomart error. Attribute #{$1} not found."
157
+ elsif body =~ /Dataset (.+) NOT FOUND/
158
+ raise DatasetError.new(body), "Biomart error. Dataset #{$1} not found."
159
+ else
160
+ raise BiomartError.new(body), "Biomart error."
161
+ end
162
+ end
163
+ end
164
+ end
85
165
 
86
166
  end
87
167
 
@@ -64,9 +64,18 @@ module Biomart
64
64
  # optional arguments:
65
65
  #
66
66
  # :filters:: hash of key-value pairs (filter => search term)
67
+ # :timeout:: set a timeout length for the request (secs)
67
68
  def count( args={} )
68
- args.merge!({ :count => "1" })
69
- result = request( :method => 'post', :url => @url, :query => generate_xml(args) )
69
+ result = request(
70
+ :method => 'post',
71
+ :url => @url,
72
+ :timeout => args[:timeout],
73
+ :query => generate_xml(
74
+ :filters => args[:filters],
75
+ :attributes => args[:attributes],
76
+ :count => "1"
77
+ )
78
+ )
70
79
  return result.to_i
71
80
  end
72
81
 
@@ -77,6 +86,7 @@ module Biomart
77
86
  # :filters:: hash of key-value pairs (filter => search term)
78
87
  # :attributes:: array of attributes to retrieve
79
88
  # :process_results:: true/false - convert search results to object
89
+ # :timeout:: set a timeout length for the request (secs)
80
90
  #
81
91
  # By default will return a hash with the following:
82
92
  #
@@ -86,9 +96,17 @@ module Biomart
86
96
  # But with the :process_results option will return an array of hashes,
87
97
  # where each hash represents a row of results (keyed by the attribute name).
88
98
  def search( args={} )
89
- response = request( :method => 'post', :url => @url, :query => generate_xml(args) )
90
- result = process_tsv( args, response )
91
- result = conv_results_to_a_of_h( result ) if args[:process_results]
99
+ response = request(
100
+ :method => 'post',
101
+ :url => @url,
102
+ :timeout => args[:timeout],
103
+ :query => generate_xml(
104
+ :filters => args[:filters],
105
+ :attributes => args[:attributes]
106
+ )
107
+ )
108
+ result = process_tsv( args, response )
109
+ result = conv_results_to_a_of_h( result ) if args[:process_results]
92
110
  return result
93
111
  end
94
112
 
@@ -0,0 +1,23 @@
1
+ begin
2
+ require "metric_fu"
3
+ MetricFu::Configuration.run do |config|
4
+ config.metrics = [:churn, :saikuro, :flog, :flay, :reek, :roodi, :rcov]
5
+ config.graphs = [:flog, :flay, :reek, :roodi, :rcov]
6
+ config.flog = { :dirs_to_flog => ["lib"] }
7
+ config.flay = { :dirs_to_flay => ["lib"] }
8
+ config.reek = { :dirs_to_reek => ["lib"] }
9
+ config.roodi = { :dirs_to_roodi => ["lib"] }
10
+ config.rcov = {
11
+ :test_files => ["test/test_*.rb"],
12
+ :rcov_opts => [
13
+ "--sort coverage",
14
+ "--no-html",
15
+ "--text-coverage",
16
+ "--no-color",
17
+ "--profile",
18
+ "--exclude /gems/,/Library/,spec,features"
19
+ ]
20
+ }
21
+ end
22
+ rescue LoadError
23
+ end
File without changes
@@ -42,7 +42,7 @@ class BiomartTest < Test::Unit::TestCase
42
42
  setup do
43
43
  @htgt_targ = @htgt.datasets["htgt_targ"]
44
44
  @htgt_trap = @htgt.datasets["htgt_trap"]
45
- @kermits = @htgt.datasets["kermits"]
45
+ @kermits = @htgt.datasets["kermits"]
46
46
  end
47
47
 
48
48
  should "have basic metadata" do
@@ -63,34 +63,31 @@ class BiomartTest < Test::Unit::TestCase
63
63
  assert( @kermits.attributes["ensembl_gene_id"].is_a?( Biomart::Attribute ), "Biomart::Dataset is not creating Biomart::Attribute objects." )
64
64
  end
65
65
 
66
- should "perform count queries" do
67
- htgt_count = @htgt_targ.count()
68
- assert( htgt_count.is_a?( Integer ), "Biomart::Dataset.count is not returning integers." )
69
- assert( htgt_count > 0, "Biomart::Dataset.count is returning zero - this is wrong!" )
66
+ should "perform count/search queries" do
67
+ perform_count_queries("net/http")
68
+ perform_search_queries("net/http")
70
69
 
71
- htgt_count_single_filter = @htgt_targ.count( :filters => { "is_eucomm" => "1" } )
72
- assert( htgt_count_single_filter.is_a?( Integer ), "Biomart::Dataset.count (with single filter) is not returning integers." )
73
- assert( htgt_count_single_filter > 0, "Biomart::Dataset.count (with single filter) is returning zero - this is wrong!" )
74
-
75
- htgt_count_single_filter_group_value = @htgt_targ.count( :filters => { "marker_symbol" => ["Cbx1","Cbx7","Art4"] } )
76
- assert( htgt_count_single_filter_group_value.is_a?( Integer ), "Biomart::Dataset.count (with single filter, group value) is not returning integers." )
77
- assert( htgt_count_single_filter_group_value > 0, "Biomart::Dataset.count (with single filter, group value) is returning zero - this is wrong!" )
70
+ #if CURB_AVAILABLE
71
+ # perform_count_queries("curb")
72
+ # perform_search_queries("curb")
73
+ # Biomart.use_net_http = true
74
+ # perform_count_queries("net/http")
75
+ # perform_search_queries("net/http")
76
+ #else
77
+ # perform_count_queries("net/http")
78
+ # perform_search_queries("net/http")
79
+ #end
78
80
  end
79
81
 
80
- should "perform search queries" do
81
- search = @htgt_trap.search()
82
- assert( search.is_a?( Hash ), "Biomart::Dataset.search (no options) is not returning a hash." )
83
- assert( search[:data].is_a?( Array ), "Biomart::Dataset.search[:data] (no options) is not returning an array." )
82
+ should "perform search queries whilst altering the timeout property" do
83
+ search = @htgt_trap.search( :timeout => 60 )
84
+ assert( search.is_a?( Hash ), "Biomart::Dataset.search (no options except per-request timeout) is not returning a hash." )
85
+ assert( search[:data].is_a?( Array ), "Biomart::Dataset.search[:data] (no options except per-request timeout) is not returning an array." )
84
86
 
85
- search1 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :process_results => true )
86
- assert( search1.is_a?( Array ), "Biomart::Dataset.search (filters defined with processing) is not returning an array." )
87
- assert( search1.first.is_a?( Hash ), "Biomart::Dataset.search (filters defined with processing) is not returning an array of hashes." )
88
- assert( search1.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters defined with processing) is not returning the correct info." )
89
-
90
- search2 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :attributes => ["marker_symbol","ensembl_gene_id"], :process_results => true )
91
- assert( search2.is_a?( Array ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array." )
92
- assert( search2.first.is_a?( Hash ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array of hashes." )
93
- assert( search2.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters and attributes defined with processing) is not returning the correct info." )
87
+ Biomart.timeout = 60
88
+ search = @htgt_trap.search()
89
+ assert( search.is_a?( Hash ), "Biomart::Dataset.search (no options except global timeout) is not returning a hash." )
90
+ assert( search[:data].is_a?( Array ), "Biomart::Dataset.search[:data] (no options except global timeout) is not returning an array." )
94
91
  end
95
92
 
96
93
  should "handle search queries that will generate poorly formatted TSV data" do
@@ -100,17 +97,49 @@ class BiomartTest < Test::Unit::TestCase
100
97
  "is_eucomm", "is_komp_csd", "is_komp_regeneron", "is_norcomm",
101
98
  "is_mgp", "mgi_accession_id", "marker_symbol", "ensembl_gene_id",
102
99
  "status", "status_code", "status_type", "status_description",
103
- "status_sequence", "pipeline_stage", "htgt_project_id", "bac",
100
+ "status_sequence", "pipeline_stage", "ikmc_project_id", "bac",
104
101
  "design_id", "design_plate", "design_well", "intvec_plate",
105
102
  "intvec_well", "intvec_distribute", "targvec_plate", "targvec_well",
106
103
  "targvec_distribute", "backbone", "cassette", "allele_name",
107
- "escell_clone_name", "escell_distribute", "es_cell_line", "colonies_picked",
108
- "is_latest_for_gene", "is_targeted_non_cond"
104
+ "escell_clone", "escell_distribute", "escell_line", "colonies_picked",
105
+ "is_latest_for_gene", "targeted_trap"
109
106
  ]
110
107
  )
111
108
  assert( search.is_a?( Hash ), "Biomart::Dataset.search (no options) is not returning a hash." )
112
109
  assert( search[:data].is_a?( Array ), "Biomart::Dataset.search[:data] (no options) is not returning an array." )
110
+ assert( search[:data].size > 0, "Biomart::Dataset.search[:data] for poorly formatted TSV data is empty." )
113
111
  end
112
+
113
+ end
114
+
115
+ def perform_count_queries( library )
116
+ htgt_count = @htgt_targ.count()
117
+ assert( htgt_count.is_a?( Integer ), "Biomart::Dataset.count is not returning integers. [using #{library} for HTTP communication]" )
118
+ assert( htgt_count > 0, "Biomart::Dataset.count is returning zero - this is wrong! [using #{library} for HTTP communication]" )
119
+
120
+ htgt_count_single_filter = @htgt_targ.count( :filters => { "is_eucomm" => "1" } )
121
+ assert( htgt_count_single_filter.is_a?( Integer ), "Biomart::Dataset.count (with single filter) is not returning integers. [using #{library} for HTTP communication]" )
122
+ assert( htgt_count_single_filter > 0, "Biomart::Dataset.count (with single filter) is returning zero - this is wrong! [using #{library} for HTTP communication]" )
123
+
124
+ htgt_count_single_filter_group_value = @htgt_targ.count( :filters => { "marker_symbol" => ["Cbx1","Cbx7","Art4"] } )
125
+ assert( htgt_count_single_filter_group_value.is_a?( Integer ), "Biomart::Dataset.count (with single filter, group value) is not returning integers. [using #{library} for HTTP communication]" )
126
+ assert( htgt_count_single_filter_group_value > 0, "Biomart::Dataset.count (with single filter, group value) is returning zero - this is wrong! [using #{library} for HTTP communication]" )
127
+ end
128
+
129
+ def perform_search_queries( library )
130
+ search = @htgt_trap.search()
131
+ assert( search.is_a?( Hash ), "Biomart::Dataset.search (no options) is not returning a hash. [using #{library} for HTTP communication]" )
132
+ assert( search[:data].is_a?( Array ), "Biomart::Dataset.search[:data] (no options) is not returning an array. [using #{library} for HTTP communication]" )
133
+
134
+ search1 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :process_results => true )
135
+ assert( search1.is_a?( Array ), "Biomart::Dataset.search (filters defined with processing) is not returning an array. [using #{library} for HTTP communication]" )
136
+ assert( search1.first.is_a?( Hash ), "Biomart::Dataset.search (filters defined with processing) is not returning an array of hashes. [using #{library} for HTTP communication]" )
137
+ assert( search1.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters defined with processing) is not returning the correct info. [using #{library} for HTTP communication]" )
138
+
139
+ search2 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :attributes => ["marker_symbol","ensembl_gene_id"], :process_results => true )
140
+ assert( search2.is_a?( Array ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array. [using #{library} for HTTP communication]" )
141
+ assert( search2.first.is_a?( Hash ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array of hashes. [using #{library} for HTTP communication]" )
142
+ assert( search2.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters and attributes defined with processing) is not returning the correct info. [using #{library} for HTTP communication]" )
114
143
  end
115
144
 
116
145
  context "The Biomart module" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biomart
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Darren Oakley
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-10-30 00:00:00 +00:00
12
+ date: 2010-01-20 00:00:00 +00:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -68,7 +68,8 @@ files:
68
68
  - script/console
69
69
  - script/destroy
70
70
  - script/generate
71
- - tasks/shoulda.rake
71
+ - tasks/metrics.task
72
+ - tasks/shoulda.task
72
73
  - test/test_biomart.rb
73
74
  - test/test_helper.rb
74
75
  has_rdoc: true