biomart 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,8 @@
1
+ === 0.1.4 2010-01-20
2
+
3
+ * 1 minor enhancement:
4
+ * Added in the ability to override the default timeout on requests.
5
+
1
6
  === 0.1.3 2009-10-30
2
7
 
3
8
  * 1 major bugfix:
@@ -12,6 +12,7 @@ lib/biomart/server.rb
12
12
  script/console
13
13
  script/destroy
14
14
  script/generate
15
- tasks/shoulda.rake
15
+ tasks/metrics.task
16
+ tasks/shoulda.task
16
17
  test/test_biomart.rb
17
18
  test/test_helper.rb
@@ -2,7 +2,7 @@
2
2
 
3
3
  http://github.com/dazoakley/biomart
4
4
 
5
- http://rubyforge.org/projects/biomart
5
+ http://rdoc.info/projects/dazoakley/biomart
6
6
 
7
7
  Biomart provides a simple interface for working with Biomart servers
8
8
  (see http://www.biomart.org for more info on Biomart itself), so you
@@ -92,13 +92,28 @@ Now all requests made through Biomart will be proxied via proxy.example.com.
92
92
  Alternatively you can also set your proxy url in the environment variable
93
93
  'http_proxy', and Biomart will automatically detect this.
94
94
 
95
+ == Setting a Timeout Value
96
+
97
+ If you need to alter the default request timeout value, you can alter this
98
+ globally via Biomart.timeout:
99
+
100
+ Biomart.timeout = 60 # For a 60 second timeout...
101
+
102
+ Or on a per-request basis by passing the :timeout option when searching:
103
+
104
+ kermits.search(
105
+ :filters => { "marker_symbol" => "Cbx1" },
106
+ :process_results => true,
107
+ :timeout => 60
108
+ )
109
+
95
110
  == Meta
96
111
 
97
112
  Written by Darren Oakley (daz dot oakley at gmail dot com)
98
113
 
99
114
  http://github.com/dazoakley/biomart
100
115
 
101
- http://rubyforge.org/projects/biomart
116
+ http://rdoc.info/projects/dazoakley/biomart
102
117
 
103
118
  == License
104
119
 
data/Rakefile CHANGED
@@ -22,7 +22,7 @@ $hoe = Hoe.spec "biomart" do
22
22
  end
23
23
 
24
24
  require "newgem/tasks"
25
- Dir["tasks/**/*.rake"].each { |t| load t }
25
+ Dir["tasks/*.task"].each { |t| load t }
26
26
 
27
27
  # TODO - want other tests/tasks run by default? Add them to the list
28
28
  # remove_task :default
@@ -2,15 +2,15 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{biomart}
5
- s.version = "0.1.3"
5
+ s.version = "0.1.4"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Darren Oakley"]
9
- s.date = %q{2009-10-30}
9
+ s.date = %q{2010-01-20}
10
10
  s.description = %q{A ruby API for interacting with Biomart services.}
11
11
  s.email = ["daz.oakley@gmail.com"]
12
12
  s.extra_rdoc_files = ["History.txt", "Manifest.txt", "README.rdoc"]
13
- s.files = ["History.txt", "Manifest.txt", "README.rdoc", "Rakefile", "biomart.gemspec", "lib/biomart.rb", "lib/biomart/attribute.rb", "lib/biomart/database.rb", "lib/biomart/dataset.rb", "lib/biomart/filter.rb", "lib/biomart/server.rb", "script/console", "script/destroy", "script/generate", "tasks/shoulda.rake", "test/test_biomart.rb", "test/test_helper.rb"]
13
+ s.files = ["History.txt", "Manifest.txt", "README.rdoc", "Rakefile", "biomart.gemspec", "lib/biomart.rb", "lib/biomart/attribute.rb", "lib/biomart/database.rb", "lib/biomart/dataset.rb", "lib/biomart/filter.rb", "lib/biomart/server.rb", "script/console", "script/destroy", "script/generate", "tasks/metrics.task", "tasks/shoulda.task", "test/test_biomart.rb", "test/test_helper.rb"]
14
14
  s.homepage = %q{http://github.com/dazoakley/biomart}
15
15
  s.rdoc_options = ["--main", "README.rdoc"]
16
16
  s.require_paths = ["lib"]
@@ -6,8 +6,16 @@ require "csv"
6
6
  require "rubygems"
7
7
  require "builder"
8
8
 
9
+ #begin
10
+ # require "curb"
11
+ # use_curb = true
12
+ #rescue LoadError
13
+ # use_curb = false
14
+ #end
15
+ #CURB_AVAILABLE = use_curb
16
+
9
17
  module Biomart
10
- VERSION = "0.1.3"
18
+ VERSION = "0.1.4"
11
19
 
12
20
  # This is the base Biomart error/exception class. Rescue it if
13
21
  # you want to catch any exceptions that this code might raise.
@@ -38,50 +46,122 @@ module Biomart
38
46
  # dataset.
39
47
  class DatasetError < BiomartError; end
40
48
 
41
- @@url = 'http://www.biomart.org/biomart/martservice'
42
- @@client = Net::HTTP
43
-
44
49
  # Centralised request function for handling all of the HTTP requests
45
50
  # to the biomart servers.
46
51
  def request( params={} )
47
- if Biomart.proxy or ENV['http_proxy']
48
- proxy_uri = Biomart.proxy
49
- proxy_uri ||= ENV['http_proxy']
50
- proxy = URI.parse( proxy_uri )
51
- @@client = Net::HTTP::Proxy( proxy.host, proxy.port )
52
- end
52
+ net_http_request(params)
53
53
 
54
- params[:url] = URI.escape(params[:url])
54
+ #if CURB_AVAILABLE and ( Biomart.use_net_http != true )
55
+ # curb_request(params)
56
+ #else
57
+ # net_http_request(params)
58
+ #end
59
+ end
60
+
61
+ class << self
62
+ attr_accessor :proxy, :timeout, :use_net_http
63
+ end
64
+
65
+ private
55
66
 
56
- if params[:method] === 'post'
57
- res = @@client.post_form( URI.parse(params[:url]), { "query" => params[:query] } )
58
- else
59
- res = @@client.get_response( URI.parse(params[:url]) )
67
+ # Utility function to perform the request method using the curb
68
+ # gem (a wrapper around libcurl) - supposed to be faster than
69
+ # Net::HTTP.
70
+ def curb_request( params={} )
71
+ client = Curl::Easy.new( params[:url] )
72
+
73
+ if Biomart.timeout or params[:timeout]
74
+ client.connect_timeout = params[:timeout] ? params[:timeout] : Biomart.timeout
75
+ end
76
+
77
+ if proxy_url() then client.proxy_url = proxy_url() end
78
+
79
+ case params[:method]
80
+ when 'post'
81
+ client.http_post( Curl::PostField.content( "query", params[:query], "text/xml" ) )
82
+ else
83
+ client.http_get
84
+ end
85
+
86
+ check_response( client.body_str, client.response_code )
87
+
88
+ return client.body_str
60
89
  end
61
90
 
62
- # Process the response code/body to catch errors.
63
- if res.code != "200"
64
- raise HTTPError.new(res.code), "HTTP error #{res.code}, please check your biomart server and URL settings."
65
- else
66
- if res.body =~ /ERROR/
67
- if res.body =~ /Filter (.+) NOT FOUND/
68
- raise FilterError.new(res.body), "Biomart error. Filter #{$1} not found."
69
- elsif res.body =~ /Attribute (.+) NOT FOUND/
70
- raise AttributeError.new(res.body), "Biomart error. Attribute #{$1} not found."
71
- elsif res.body =~ /Dataset (.+) NOT FOUND/
72
- raise DatasetError.new(res.body), "Biomart error. Dataset #{$1} not found."
73
- else
74
- raise BiomartError.new(res.body), "Biomart error."
91
+ # Utility function to perform the request method using Net::HTTP.
92
+ def net_http_request( params={} )
93
+ uri = URI.parse( params[:url] )
94
+ client = net_http_client()
95
+ req = nil
96
+ response = nil
97
+
98
+ case params[:method]
99
+ when 'post'
100
+ req = Net::HTTP::Post.new(uri.path)
101
+ req.form_data = { "query" => params[:query] }
102
+ else
103
+ req = Net::HTTP::Get.new(uri.request_uri)
104
+ end
105
+
106
+ client.start(uri.host, uri.port) do |http|
107
+ if Biomart.timeout or params[:timeout]
108
+ http.read_timeout = params[:timeout] ? params[:timeout] : Biomart.timeout
109
+ http.open_timeout = params[:timeout] ? params[:timeout] : Biomart.timeout
75
110
  end
111
+ response = http.request(req)
112
+ end
113
+
114
+ check_response( response.body, response.code )
115
+
116
+ return response.body
117
+ end
118
+
119
+ # Utility function to create a Net::HTTP object.
120
+ def net_http_client
121
+ client = Net::HTTP
122
+ if proxy_url()
123
+ proxy = URI.parse( proxy_url() )
124
+ client = Net::HTTP::Proxy( proxy.host, proxy.port )
76
125
  end
126
+ return client
77
127
  end
78
128
 
79
- return res.body
80
- end
81
-
82
- class << self
83
- attr_accessor :proxy
84
- end
129
+ # Utility function to determine if we need to use a proxy. If yes,
130
+ # returns the proxy url, if no, returns false.
131
+ def proxy_url
132
+ if Biomart.proxy or ENV['http_proxy'] or ENV['HTTP_PROXY']
133
+ proxy_uri = Biomart.proxy
134
+ proxy_uri ||= ENV['http_proxy']
135
+ proxy_uri ||= ENV['HTTP_PROXY']
136
+
137
+ return proxy_uri
138
+ else
139
+ return false
140
+ end
141
+ end
142
+
143
+ # Utility function to test the response from a http request.
144
+ # Raises errors if appropriate.
145
+ def check_response( body, code )
146
+ # Process the response code/body to catch errors.
147
+ if code.is_a?(String) then code = code.to_i end
148
+
149
+ if code != 200
150
+ raise HTTPError.new(code), "HTTP error #{code}, please check your biomart server and URL settings."
151
+ else
152
+ if body =~ /ERROR/
153
+ if body =~ /Filter (.+) NOT FOUND/
154
+ raise FilterError.new(body), "Biomart error. Filter #{$1} not found."
155
+ elsif body =~ /Attribute (.+) NOT FOUND/
156
+ raise AttributeError.new(body), "Biomart error. Attribute #{$1} not found."
157
+ elsif body =~ /Dataset (.+) NOT FOUND/
158
+ raise DatasetError.new(body), "Biomart error. Dataset #{$1} not found."
159
+ else
160
+ raise BiomartError.new(body), "Biomart error."
161
+ end
162
+ end
163
+ end
164
+ end
85
165
 
86
166
  end
87
167
 
@@ -64,9 +64,18 @@ module Biomart
64
64
  # optional arguments:
65
65
  #
66
66
  # :filters:: hash of key-value pairs (filter => search term)
67
+ # :timeout:: set a timeout length for the request (secs)
67
68
  def count( args={} )
68
- args.merge!({ :count => "1" })
69
- result = request( :method => 'post', :url => @url, :query => generate_xml(args) )
69
+ result = request(
70
+ :method => 'post',
71
+ :url => @url,
72
+ :timeout => args[:timeout],
73
+ :query => generate_xml(
74
+ :filters => args[:filters],
75
+ :attributes => args[:attributes],
76
+ :count => "1"
77
+ )
78
+ )
70
79
  return result.to_i
71
80
  end
72
81
 
@@ -77,6 +86,7 @@ module Biomart
77
86
  # :filters:: hash of key-value pairs (filter => search term)
78
87
  # :attributes:: array of attributes to retrieve
79
88
  # :process_results:: true/false - convert search results to object
89
+ # :timeout:: set a timeout length for the request (secs)
80
90
  #
81
91
  # By default will return a hash with the following:
82
92
  #
@@ -86,9 +96,17 @@ module Biomart
86
96
  # But with the :process_results option will return an array of hashes,
87
97
  # where each hash represents a row of results (keyed by the attribute name).
88
98
  def search( args={} )
89
- response = request( :method => 'post', :url => @url, :query => generate_xml(args) )
90
- result = process_tsv( args, response )
91
- result = conv_results_to_a_of_h( result ) if args[:process_results]
99
+ response = request(
100
+ :method => 'post',
101
+ :url => @url,
102
+ :timeout => args[:timeout],
103
+ :query => generate_xml(
104
+ :filters => args[:filters],
105
+ :attributes => args[:attributes]
106
+ )
107
+ )
108
+ result = process_tsv( args, response )
109
+ result = conv_results_to_a_of_h( result ) if args[:process_results]
92
110
  return result
93
111
  end
94
112
 
@@ -0,0 +1,23 @@
1
+ begin
2
+ require "metric_fu"
3
+ MetricFu::Configuration.run do |config|
4
+ config.metrics = [:churn, :saikuro, :flog, :flay, :reek, :roodi, :rcov]
5
+ config.graphs = [:flog, :flay, :reek, :roodi, :rcov]
6
+ config.flog = { :dirs_to_flog => ["lib"] }
7
+ config.flay = { :dirs_to_flay => ["lib"] }
8
+ config.reek = { :dirs_to_reek => ["lib"] }
9
+ config.roodi = { :dirs_to_roodi => ["lib"] }
10
+ config.rcov = {
11
+ :test_files => ["test/test_*.rb"],
12
+ :rcov_opts => [
13
+ "--sort coverage",
14
+ "--no-html",
15
+ "--text-coverage",
16
+ "--no-color",
17
+ "--profile",
18
+ "--exclude /gems/,/Library/,spec,features"
19
+ ]
20
+ }
21
+ end
22
+ rescue LoadError
23
+ end
File without changes
@@ -42,7 +42,7 @@ class BiomartTest < Test::Unit::TestCase
42
42
  setup do
43
43
  @htgt_targ = @htgt.datasets["htgt_targ"]
44
44
  @htgt_trap = @htgt.datasets["htgt_trap"]
45
- @kermits = @htgt.datasets["kermits"]
45
+ @kermits = @htgt.datasets["kermits"]
46
46
  end
47
47
 
48
48
  should "have basic metadata" do
@@ -63,34 +63,31 @@ class BiomartTest < Test::Unit::TestCase
63
63
  assert( @kermits.attributes["ensembl_gene_id"].is_a?( Biomart::Attribute ), "Biomart::Dataset is not creating Biomart::Attribute objects." )
64
64
  end
65
65
 
66
- should "perform count queries" do
67
- htgt_count = @htgt_targ.count()
68
- assert( htgt_count.is_a?( Integer ), "Biomart::Dataset.count is not returning integers." )
69
- assert( htgt_count > 0, "Biomart::Dataset.count is returning zero - this is wrong!" )
66
+ should "perform count/search queries" do
67
+ perform_count_queries("net/http")
68
+ perform_search_queries("net/http")
70
69
 
71
- htgt_count_single_filter = @htgt_targ.count( :filters => { "is_eucomm" => "1" } )
72
- assert( htgt_count_single_filter.is_a?( Integer ), "Biomart::Dataset.count (with single filter) is not returning integers." )
73
- assert( htgt_count_single_filter > 0, "Biomart::Dataset.count (with single filter) is returning zero - this is wrong!" )
74
-
75
- htgt_count_single_filter_group_value = @htgt_targ.count( :filters => { "marker_symbol" => ["Cbx1","Cbx7","Art4"] } )
76
- assert( htgt_count_single_filter_group_value.is_a?( Integer ), "Biomart::Dataset.count (with single filter, group value) is not returning integers." )
77
- assert( htgt_count_single_filter_group_value > 0, "Biomart::Dataset.count (with single filter, group value) is returning zero - this is wrong!" )
70
+ #if CURB_AVAILABLE
71
+ # perform_count_queries("curb")
72
+ # perform_search_queries("curb")
73
+ # Biomart.use_net_http = true
74
+ # perform_count_queries("net/http")
75
+ # perform_search_queries("net/http")
76
+ #else
77
+ # perform_count_queries("net/http")
78
+ # perform_search_queries("net/http")
79
+ #end
78
80
  end
79
81
 
80
- should "perform search queries" do
81
- search = @htgt_trap.search()
82
- assert( search.is_a?( Hash ), "Biomart::Dataset.search (no options) is not returning a hash." )
83
- assert( search[:data].is_a?( Array ), "Biomart::Dataset.search[:data] (no options) is not returning an array." )
82
+ should "perform search queries whilst altering the timeout property" do
83
+ search = @htgt_trap.search( :timeout => 60 )
84
+ assert( search.is_a?( Hash ), "Biomart::Dataset.search (no options except per-request timeout) is not returning a hash." )
85
+ assert( search[:data].is_a?( Array ), "Biomart::Dataset.search[:data] (no options except per-request timeout) is not returning an array." )
84
86
 
85
- search1 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :process_results => true )
86
- assert( search1.is_a?( Array ), "Biomart::Dataset.search (filters defined with processing) is not returning an array." )
87
- assert( search1.first.is_a?( Hash ), "Biomart::Dataset.search (filters defined with processing) is not returning an array of hashes." )
88
- assert( search1.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters defined with processing) is not returning the correct info." )
89
-
90
- search2 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :attributes => ["marker_symbol","ensembl_gene_id"], :process_results => true )
91
- assert( search2.is_a?( Array ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array." )
92
- assert( search2.first.is_a?( Hash ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array of hashes." )
93
- assert( search2.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters and attributes defined with processing) is not returning the correct info." )
87
+ Biomart.timeout = 60
88
+ search = @htgt_trap.search()
89
+ assert( search.is_a?( Hash ), "Biomart::Dataset.search (no options except global timeout) is not returning a hash." )
90
+ assert( search[:data].is_a?( Array ), "Biomart::Dataset.search[:data] (no options except global timeout) is not returning an array." )
94
91
  end
95
92
 
96
93
  should "handle search queries that will generate poorly formatted TSV data" do
@@ -100,17 +97,49 @@ class BiomartTest < Test::Unit::TestCase
100
97
  "is_eucomm", "is_komp_csd", "is_komp_regeneron", "is_norcomm",
101
98
  "is_mgp", "mgi_accession_id", "marker_symbol", "ensembl_gene_id",
102
99
  "status", "status_code", "status_type", "status_description",
103
- "status_sequence", "pipeline_stage", "htgt_project_id", "bac",
100
+ "status_sequence", "pipeline_stage", "ikmc_project_id", "bac",
104
101
  "design_id", "design_plate", "design_well", "intvec_plate",
105
102
  "intvec_well", "intvec_distribute", "targvec_plate", "targvec_well",
106
103
  "targvec_distribute", "backbone", "cassette", "allele_name",
107
- "escell_clone_name", "escell_distribute", "es_cell_line", "colonies_picked",
108
- "is_latest_for_gene", "is_targeted_non_cond"
104
+ "escell_clone", "escell_distribute", "escell_line", "colonies_picked",
105
+ "is_latest_for_gene", "targeted_trap"
109
106
  ]
110
107
  )
111
108
  assert( search.is_a?( Hash ), "Biomart::Dataset.search (no options) is not returning a hash." )
112
109
  assert( search[:data].is_a?( Array ), "Biomart::Dataset.search[:data] (no options) is not returning an array." )
110
+ assert( search[:data].size > 0, "Biomart::Dataset.search[:data] for poorly formatted TSV data is empty." )
113
111
  end
112
+
113
+ end
114
+
115
+ def perform_count_queries( library )
116
+ htgt_count = @htgt_targ.count()
117
+ assert( htgt_count.is_a?( Integer ), "Biomart::Dataset.count is not returning integers. [using #{library} for HTTP communication]" )
118
+ assert( htgt_count > 0, "Biomart::Dataset.count is returning zero - this is wrong! [using #{library} for HTTP communication]" )
119
+
120
+ htgt_count_single_filter = @htgt_targ.count( :filters => { "is_eucomm" => "1" } )
121
+ assert( htgt_count_single_filter.is_a?( Integer ), "Biomart::Dataset.count (with single filter) is not returning integers. [using #{library} for HTTP communication]" )
122
+ assert( htgt_count_single_filter > 0, "Biomart::Dataset.count (with single filter) is returning zero - this is wrong! [using #{library} for HTTP communication]" )
123
+
124
+ htgt_count_single_filter_group_value = @htgt_targ.count( :filters => { "marker_symbol" => ["Cbx1","Cbx7","Art4"] } )
125
+ assert( htgt_count_single_filter_group_value.is_a?( Integer ), "Biomart::Dataset.count (with single filter, group value) is not returning integers. [using #{library} for HTTP communication]" )
126
+ assert( htgt_count_single_filter_group_value > 0, "Biomart::Dataset.count (with single filter, group value) is returning zero - this is wrong! [using #{library} for HTTP communication]" )
127
+ end
128
+
129
+ def perform_search_queries( library )
130
+ search = @htgt_trap.search()
131
+ assert( search.is_a?( Hash ), "Biomart::Dataset.search (no options) is not returning a hash. [using #{library} for HTTP communication]" )
132
+ assert( search[:data].is_a?( Array ), "Biomart::Dataset.search[:data] (no options) is not returning an array. [using #{library} for HTTP communication]" )
133
+
134
+ search1 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :process_results => true )
135
+ assert( search1.is_a?( Array ), "Biomart::Dataset.search (filters defined with processing) is not returning an array. [using #{library} for HTTP communication]" )
136
+ assert( search1.first.is_a?( Hash ), "Biomart::Dataset.search (filters defined with processing) is not returning an array of hashes. [using #{library} for HTTP communication]" )
137
+ assert( search1.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters defined with processing) is not returning the correct info. [using #{library} for HTTP communication]" )
138
+
139
+ search2 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :attributes => ["marker_symbol","ensembl_gene_id"], :process_results => true )
140
+ assert( search2.is_a?( Array ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array. [using #{library} for HTTP communication]" )
141
+ assert( search2.first.is_a?( Hash ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array of hashes. [using #{library} for HTTP communication]" )
142
+ assert( search2.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters and attributes defined with processing) is not returning the correct info. [using #{library} for HTTP communication]" )
114
143
  end
115
144
 
116
145
  context "The Biomart module" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biomart
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Darren Oakley
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-10-30 00:00:00 +00:00
12
+ date: 2010-01-20 00:00:00 +00:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -68,7 +68,8 @@ files:
68
68
  - script/console
69
69
  - script/destroy
70
70
  - script/generate
71
- - tasks/shoulda.rake
71
+ - tasks/metrics.task
72
+ - tasks/shoulda.task
72
73
  - test/test_biomart.rb
73
74
  - test/test_helper.rb
74
75
  has_rdoc: true