biomart 0.1.5 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,10 @@
1
+ === 0.2.0 2010-06-10
2
+
3
+ * 2 major enhancements:
4
+ * Added the ability to perform federated queries across two datasets.
5
+ * Added the option to discard rows of data if a specified list of
6
+ fields are not present.
7
+
1
8
  === 0.1.5 2010-02-04
2
9
 
3
10
  * 1 major bugfix:
@@ -12,7 +12,7 @@ lib/biomart/server.rb
12
12
  script/console
13
13
  script/destroy
14
14
  script/generate
15
- tasks/metrics.task
16
- tasks/shoulda.task
15
+ tasks/metrics.rake
16
+ tasks/shoulda.rake
17
17
  test/test_biomart.rb
18
18
  test/test_helper.rb
@@ -79,7 +79,57 @@ just get on with things...
79
79
  # etc. etc.
80
80
 
81
81
  See Biomart module and Class docs for more detail.
82
+
83
+ == Federated Searches
84
+
85
+ To perform a federated search across two datasets...
86
+
87
+ htgt = Biomart::Server.new( "http://www.sanger.ac.uk/htgt/biomart" )
88
+
89
+ res = htgt.datasets["htgt_targ"].search(
90
+ :filters => {
91
+ "status" => [
92
+ "Mice - Genotype confirmed",
93
+ "Mice - Germline transmission",
94
+ "Mice - Microinjection in progress",
95
+ "ES Cells - Targeting Confirmed"
96
+ ]
97
+ },
98
+ :attributes => [
99
+ "marker_symbol",
100
+ "mgi_accession_id",
101
+ "status"
102
+ ],
103
+ :federate => [
104
+ {
105
+ :dataset => htgt.datasets["mmusculus_gene_ensembl"],
106
+ :filters => {
107
+ "chromosome_name" => "1",
108
+ "start" => "1",
109
+ "end" => "10000000"
110
+ },
111
+ :attributes => []
112
+ }
113
+ ]
114
+ )
82
115
 
116
+ The above will perform a federated query for all genes with available
117
+ mice knockout es cells in the first 10Mb of chromosome 1 from the IKMC
118
+ projects (not really important, but it's an example of a complex query).
119
+
120
+ The basic search arguments are the same as if we were searching across
121
+ a single dataset, with the addition of the :federate option, which is
122
+ an array of hashes for each dataset (and additional filters/attributes)
123
+ that we want to federate our search with.
124
+
125
+ Note: at present you can only federate across two datasets, this is
126
+ limitation in the current stable release of biomart (0.7). If you try
127
+ federate across more than two datasets, a Biomart::ArgumentError will
128
+ be raised. This limitation shall be removed from this API when it is
129
+ possible to federate across more than two datasets in biomart itself.
130
+
131
+ Count queries are only allowed on single datasets.
132
+
83
133
  == Using a Proxy
84
134
 
85
135
  If you need to channel all of your requests via a proxy, specify your
data/Rakefile CHANGED
@@ -1,5 +1,8 @@
1
1
  require "rubygems"
2
+
3
+ gem "flog", "= 2.2.0"
2
4
  gem "hoe", ">= 2.1.0"
5
+
3
6
  require "hoe"
4
7
  require "fileutils"
5
8
  require "./lib/biomart"
@@ -15,14 +18,14 @@ $hoe = Hoe.spec "biomart" do
15
18
  self.rubyforge_name = self.name
16
19
  self.url = "http://github.com/dazoakley/biomart"
17
20
  self.summary = "A ruby API for interacting with Biomart services."
18
- self.description = "A ruby API for interacting with Biomart services."
21
+ self.description = "A ruby API for interacting with Biomart XML based webservices."
19
22
  self.extra_deps = [["builder",">= 0"]]
20
- self.extra_dev_deps = [["thoughtbot-shoulda",">=0"]]
23
+ self.extra_dev_deps = [["shoulda",">= 2.10"]]
21
24
  self.extra_rdoc_files = ["README.rdoc"]
22
25
  end
23
26
 
24
27
  require "newgem/tasks"
25
- Dir["tasks/*.task"].each { |t| load t }
28
+ Dir["tasks/*.rake"].each { |t| load t }
26
29
 
27
30
  # TODO - want other tests/tasks run by default? Add them to the list
28
31
  # remove_task :default
@@ -2,12 +2,12 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{biomart}
5
- s.version = "0.1.5"
5
+ s.version = "0.2.0"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Darren Oakley"]
9
- s.date = %q{2010-02-04}
10
- s.description = %q{A ruby API for interacting with Biomart services.}
9
+ s.date = %q{2010-06-10}
10
+ s.description = %q{A ruby API for interacting with Biomart XML based webservices.}
11
11
  s.email = ["daz.oakley@gmail.com"]
12
12
  s.extra_rdoc_files = ["History.txt", "Manifest.txt", "README.rdoc"]
13
13
  s.files = ["History.txt", "Manifest.txt", "README.rdoc", "Rakefile", "biomart.gemspec", "lib/biomart.rb", "lib/biomart/attribute.rb", "lib/biomart/database.rb", "lib/biomart/dataset.rb", "lib/biomart/filter.rb", "lib/biomart/server.rb", "script/console", "script/destroy", "script/generate", "tasks/metrics.task", "tasks/shoulda.task", "test/test_biomart.rb", "test/test_helper.rb"]
@@ -15,7 +15,7 @@ Gem::Specification.new do |s|
15
15
  s.rdoc_options = ["--main", "README.rdoc"]
16
16
  s.require_paths = ["lib"]
17
17
  s.rubyforge_project = %q{biomart}
18
- s.rubygems_version = %q{1.3.5}
18
+ s.rubygems_version = %q{1.3.7}
19
19
  s.summary = %q{A ruby API for interacting with Biomart services.}
20
20
  s.test_files = ["test/test_biomart.rb", "test/test_helper.rb"]
21
21
 
@@ -23,14 +23,18 @@ Gem::Specification.new do |s|
23
23
  current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
24
24
  s.specification_version = 3
25
25
 
26
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
26
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
27
27
  s.add_runtime_dependency(%q<builder>, [">= 0"])
28
- s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
29
- s.add_development_dependency(%q<hoe>, [">= 2.3.3"])
28
+ s.add_development_dependency(%q<shoulda>, [">= 2.10"])
29
+ s.add_development_dependency(%q<hoe>, [">= 2.6.1"])
30
30
  else
31
31
  s.add_dependency(%q<builder>, [">= 0"])
32
+ s.add_dependency(%q<shoulda>, [">= 2.10"])
33
+ s.add_dependency(%q<hoe>, [">= 2.6.1"])
32
34
  end
33
35
  else
34
36
  s.add_dependency(%q<builder>, [">= 0"])
37
+ s.add_dependency(%q<shoulda>, [">= 2.10"])
38
+ s.add_dependency(%q<hoe>, [">= 2.6.1"])
35
39
  end
36
40
  end
@@ -1,21 +1,14 @@
1
1
  require "uri"
2
2
  require "net/http"
3
+ require "cgi"
3
4
  require "rexml/document"
4
5
  require "csv"
5
6
 
6
7
  require "rubygems"
7
8
  require "builder"
8
9
 
9
- #begin
10
- # require "curb"
11
- # use_curb = true
12
- #rescue LoadError
13
- # use_curb = false
14
- #end
15
- #CURB_AVAILABLE = use_curb
16
-
17
10
  module Biomart
18
- VERSION = "0.1.5"
11
+ VERSION = "0.2.0"
19
12
 
20
13
  # This is the base Biomart error/exception class. Rescue it if
21
14
  # you want to catch any exceptions that this code might raise.
@@ -46,76 +39,49 @@ module Biomart
46
39
  # dataset.
47
40
  class DatasetError < BiomartError; end
48
41
 
42
+ # Error class representing errors in the arguments being passed
43
+ # to the api.
44
+ class ArgumentError < BiomartError; end
45
+
49
46
  # Centralised request function for handling all of the HTTP requests
50
47
  # to the biomart servers.
51
48
  def request( params={} )
52
- net_http_request(params)
49
+ if params[:url] =~ / /
50
+ params[:url].gsub!(" ","+")
51
+ end
52
+
53
+ uri = URI.parse( params[:url] )
54
+ client = net_http_client()
55
+ req = nil
56
+ response = nil
57
+
58
+ case params[:method]
59
+ when 'post'
60
+ req = Net::HTTP::Post.new(uri.path)
61
+ req.form_data = { "query" => params[:query] }
62
+ else
63
+ req = Net::HTTP::Get.new(uri.request_uri)
64
+ end
65
+
66
+ client.start(uri.host, uri.port) do |http|
67
+ if Biomart.timeout or params[:timeout]
68
+ http.read_timeout = params[:timeout] ? params[:timeout] : Biomart.timeout
69
+ http.open_timeout = params[:timeout] ? params[:timeout] : Biomart.timeout
70
+ end
71
+ response = http.request(req)
72
+ end
53
73
 
54
- #if CURB_AVAILABLE and ( Biomart.use_net_http != true )
55
- # curb_request(params)
56
- #else
57
- # net_http_request(params)
58
- #end
74
+ check_response( response.body, response.code )
75
+
76
+ return response.body
59
77
  end
60
78
 
61
79
  class << self
62
- attr_accessor :proxy, :timeout, :use_net_http
80
+ attr_accessor :proxy, :timeout
63
81
  end
64
82
 
65
83
  private
66
84
 
67
- # Utility function to perform the request method using the curb
68
- # gem (a wrapper around libcurl) - supposed to be faster than
69
- # Net::HTTP.
70
- def curb_request( params={} )
71
- client = Curl::Easy.new( params[:url] )
72
-
73
- if Biomart.timeout or params[:timeout]
74
- client.connect_timeout = params[:timeout] ? params[:timeout] : Biomart.timeout
75
- end
76
-
77
- if proxy_url() then client.proxy_url = proxy_url() end
78
-
79
- case params[:method]
80
- when 'post'
81
- client.http_post( Curl::PostField.content( "query", params[:query], "text/xml" ) )
82
- else
83
- client.http_get
84
- end
85
-
86
- check_response( client.body_str, client.response_code )
87
-
88
- return client.body_str
89
- end
90
-
91
- # Utility function to perform the request method using Net::HTTP.
92
- def net_http_request( params={} )
93
- uri = URI.parse( params[:url] )
94
- client = net_http_client()
95
- req = nil
96
- response = nil
97
-
98
- case params[:method]
99
- when 'post'
100
- req = Net::HTTP::Post.new(uri.path)
101
- req.form_data = { "query" => params[:query] }
102
- else
103
- req = Net::HTTP::Get.new(uri.request_uri)
104
- end
105
-
106
- client.start(uri.host, uri.port) do |http|
107
- if Biomart.timeout or params[:timeout]
108
- http.read_timeout = params[:timeout] ? params[:timeout] : Biomart.timeout
109
- http.open_timeout = params[:timeout] ? params[:timeout] : Biomart.timeout
110
- end
111
- response = http.request(req)
112
- end
113
-
114
- check_response( response.body, response.code )
115
-
116
- return response.body
117
- end
118
-
119
85
  # Utility function to create a Net::HTTP object.
120
86
  def net_http_client
121
87
  client = Net::HTTP
@@ -2,13 +2,26 @@ module Biomart
2
2
  # Class representation for a biomart attribute.
3
3
  # Will belong to a Biomart::Dataset.
4
4
  class Attribute
5
- attr_reader :name, :display_name, :default
5
+ attr_reader :name, :display_name
6
6
 
7
7
  def initialize(args)
8
8
  @name = args["internalName"]
9
9
  @display_name = args["displayName"]
10
10
  @default = args["default"] ? true : false
11
+ @hidden = args["hideDisplay"] ? true : false
11
12
  end
12
13
 
14
+ # Convenience method to see if this attribute is hidden from
15
+ # the standard MartView interface. Returns true/false.
16
+ def hidden?
17
+ @hidden
18
+ end
19
+
20
+ # Convenience method to see if this attribute would be
21
+ # enabled by default in the standard MartView interface.
22
+ # Returns true/false.
23
+ def default?
24
+ @default
25
+ end
13
26
  end
14
27
  end
@@ -36,6 +36,12 @@ module Biomart
36
36
  return @datasets
37
37
  end
38
38
 
39
+ # Returns true / false if this database is visbile in the
40
+ # default MartView interface.
41
+ def visible?
42
+ @visible
43
+ end
44
+
39
45
  private
40
46
 
41
47
  # Utility method to do the webservice call to the biomart server
@@ -62,10 +62,20 @@ module Biomart
62
62
  # the result of the count query.
63
63
  #
64
64
  # optional arguments:
65
- #
66
- # :filters:: hash of key-value pairs (filter => search term)
67
- # :timeout:: set a timeout length for the request (secs)
65
+ #
66
+ # {
67
+ # :timeout => integer, # set a timeout length for the request (secs)
68
+ # :filters => {} # hash of key-value pairs (filter => search term)
69
+ # }
68
70
  def count( args={} )
71
+ if args[:federate]
72
+ raise Biomart::ArgumentError, "You cannot federate a count query."
73
+ end
74
+
75
+ if args[:required_attributes]
76
+ raise Biomart::ArgumentError, "The :required_attributes option is not allowed on count queries."
77
+ end
78
+
69
79
  result = request(
70
80
  :method => 'post',
71
81
  :url => @url,
@@ -82,30 +92,51 @@ module Biomart
82
92
  # Function to perform a Biomart search.
83
93
  #
84
94
  # optional arguments:
95
+ #
96
+ # {
97
+ # :process_results => true/false, # convert search results to object
98
+ # :timeout => integer, # set a timeout length for the request (secs)
99
+ # :filters => {}, # hash of key-value pairs (filter => search term)
100
+ # :attributes => [], # array of attributes to retrieve
101
+ # :required_attributes => [], # array of attributes that are required
102
+ # :federate => [
103
+ # {
104
+ # :dataset => Biomart::Dataset, # A dataset object to federate with
105
+ # :filters => {}, # hash of key-value pairs (filter => search term)
106
+ # :attributes => [] # array of attributes to retrieve
107
+ # }
108
+ # ]
109
+ # }
110
+ #
111
+ # Note, if you do not pass any filters or attributes arguments, the defaults
112
+ # for the dataset shall be used.
85
113
  #
86
- # :filters:: hash of key-value pairs (filter => search term)
87
- # :attributes:: array of attributes to retrieve
88
- # :process_results:: true/false - convert search results to object
89
- # :timeout:: set a timeout length for the request (secs)
114
+ # Also, using the :required_attributes option - this performs AND logic and will require
115
+ # data to be returned in all of the listed attributes in order for it to be returned.
90
116
  #
91
117
  # By default will return a hash with the following:
92
118
  #
93
- # :headers:: array of headers
94
- # :data:: array of arrays containing search results
119
+ # {
120
+ # :headers => [], # array of headers
121
+ # :data => [] # array of arrays containing search results
122
+ # }
95
123
  #
96
124
  # But with the :process_results option will return an array of hashes,
97
125
  # where each hash represents a row of results (keyed by the attribute name).
98
126
  def search( args={} )
127
+ if args[:required_attributes] and !args[:required_attributes].is_a?(Array)
128
+ raise Biomart::ArgumentError, "The :required_attributes option must be passed as an array."
129
+ end
130
+
99
131
  response = request(
100
132
  :method => 'post',
101
133
  :url => @url,
102
134
  :timeout => args[:timeout],
103
- :query => generate_xml(
104
- :filters => args[:filters],
105
- :attributes => args[:attributes]
106
- )
135
+ :query => generate_xml( process_xml_args(args) )
107
136
  )
137
+
108
138
  result = process_tsv( args, response )
139
+ result = filter_data_rows( args, result ) if args[:required_attributes]
109
140
  result = conv_results_to_a_of_h( result ) if args[:process_results]
110
141
  return result
111
142
  end
@@ -118,38 +149,22 @@ module Biomart
118
149
  xml.instruct!
119
150
  xml.declare!( :DOCTYPE, :Query )
120
151
  xml.Query( :virtualSchemaName => "default", :formatter => "TSV", :header => "0", :uniqueRows => "1", :count => args[:count], :datasetConfigVersion => "0.6" ) {
121
- xml.Dataset( :name => @name, :interface => "default" ) {
122
-
123
- if args[:filters]
124
- args[:filters].each do |name,value|
125
- if value.is_a? Array
126
- value = value.join(",")
127
- end
128
- xml.Filter( :name => name, :value => value )
129
- end
130
- else
131
- self.filters.each do |name,filter|
132
- if filter.default
133
- xml.Filter( :name => name, :value => filter.default_value )
134
- end
135
- end
136
- end
137
-
138
- unless args[:count]
139
- if args[:attributes]
140
- args[:attributes].each do |name|
141
- xml.Attribute( :name => name )
142
- end
143
- else
144
- self.attributes.each do |name,attribute|
145
- if attribute.default
146
- xml.Attribute( :name => name )
147
- end
148
- end
152
+ dataset_xml( xml, self, { :filters => args[:filters], :attributes => args[:attributes] } )
153
+
154
+ if args[:federate]
155
+ args[:federate].each do |joined_dataset|
156
+ unless joined_dataset[:dataset].is_a?(Biomart::Dataset)
157
+ raise Biomart::ArgumentError, "You must pass a Biomart::Dataset object to the :federate[:dataset] option."
149
158
  end
159
+
160
+ dataset_xml(
161
+ xml,
162
+ joined_dataset[:dataset],
163
+ { :filters => joined_dataset[:filters], :attributes => joined_dataset[:attributes] }
164
+ )
150
165
  end
151
-
152
- }
166
+ end
167
+
153
168
  }
154
169
 
155
170
  return biomart_xml
@@ -190,21 +205,77 @@ module Biomart
190
205
  end
191
206
  end
192
207
 
208
+ # Utility function to process and test the arguments passed for
209
+ # the xml query.
210
+ def process_xml_args( args={} )
211
+ xml_args = {
212
+ :filters => args[:filters],
213
+ :attributes => args[:attributes]
214
+ }
215
+
216
+ if args[:federate]
217
+ unless args[:federate].is_a?(Array)
218
+ raise Biomart::ArgumentError, "The :federate option must be passed as an array."
219
+ end
220
+
221
+ unless args[:federate].size == 1
222
+ raise Biomart::ArgumentError, "Sorry, we can only federate two datasets at present. This limitation shall be lifted in version 0.8 of biomart."
223
+ end
224
+
225
+ xml_args[:federate] = args[:federate]
226
+ end
227
+
228
+ return xml_args
229
+ end
230
+
231
+ # Helper function to produce the portion of the biomart xml for
232
+ # a dataset query.
233
+ def dataset_xml( xml, dataset, args )
234
+ xml.Dataset( :name => dataset.name, :interface => "default" ) {
235
+
236
+ if args[:filters]
237
+ args[:filters].each do |name,value|
238
+ if value.is_a? Array
239
+ value = value.join(",")
240
+ end
241
+ xml.Filter( :name => name, :value => value )
242
+ end
243
+ else
244
+ dataset.filters.each do |name,filter|
245
+ if filter.default?
246
+ xml.Filter( :name => name, :value => filter.default_value )
247
+ end
248
+ end
249
+ end
250
+
251
+ unless args[:count]
252
+ if args[:attributes]
253
+ args[:attributes].each do |name|
254
+ xml.Attribute( :name => name )
255
+ end
256
+ else
257
+ dataset.attributes.each do |name,attribute|
258
+ if attribute.default?
259
+ xml.Attribute( :name => name )
260
+ end
261
+ end
262
+ end
263
+ end
264
+
265
+ }
266
+ end
267
+
193
268
  # Utility function to transform the tab-separated data retrieved
194
269
  # from the Biomart search query into a ruby object.
195
270
  def process_tsv( args, tsv )
196
271
  headers = []
197
272
  parsed_data = []
273
+
274
+ append_header_attributes_for_tsv( headers, self, args[:attributes] )
198
275
 
199
- if args[:attributes]
200
- args[:attributes].each do |attribute|
201
- headers.push(attribute)
202
- end
203
- else
204
- self.attributes.each do |name,attribute|
205
- if attribute.default
206
- headers.push(name)
207
- end
276
+ if args[:federate]
277
+ args[:federate].each do |joined_dataset|
278
+ append_header_attributes_for_tsv( headers, joined_dataset[:dataset], joined_dataset[:attributes] )
208
279
  end
209
280
  end
210
281
 
@@ -231,6 +302,22 @@ module Biomart
231
302
  }
232
303
  end
233
304
 
305
+ # Helper function to append the attribute names to the 'headers' array
306
+ # for processing the returned results.
307
+ def append_header_attributes_for_tsv( headers, dataset, attributes )
308
+ if attributes
309
+ attributes.each do |attribute|
310
+ headers.push(attribute)
311
+ end
312
+ else
313
+ dataset.attributes.each do |name,attribute|
314
+ if attribute.default?
315
+ headers.push(name)
316
+ end
317
+ end
318
+ end
319
+ end
320
+
234
321
  # Utility function to process TSV formatted data that raises errors. (Biomart
235
322
  # has a habit of serving out this...) First attempts to use the CSV modules
236
323
  # 'parse_line' function to read in the data, if that fails, tries to use split
@@ -301,6 +388,46 @@ module Biomart
301
388
 
302
389
  return result_objects
303
390
  end
304
-
391
+
392
+ # Utility function to remove data rows from a search result that do not include
393
+ # the :required_attributes.
394
+ def filter_data_rows( args, result )
395
+ # Get the list of attributes searched for...
396
+ attributes = args[:attributes] ? args[:attributes] : []
397
+ if attributes.empty?
398
+ self.attributes.each do |name,attribute|
399
+ if attribute.default?
400
+ attributes.push(name)
401
+ end
402
+ end
403
+ end
404
+
405
+ # Work out which attribute positions we need to test...
406
+ positions_to_test = []
407
+ attributes.each_index do |index|
408
+ if args[:required_attributes].include?(attributes[index])
409
+ positions_to_test.push(index)
410
+ end
411
+ end
412
+
413
+ # Now go through the results and filter out the unwanted data...
414
+ filtered_data = []
415
+ result[:data].each do |data_row|
416
+ save_row_count = 0
417
+
418
+ positions_to_test.each do |position|
419
+ save_row_count = save_row_count + 1 unless data_row[position].nil?
420
+ end
421
+
422
+ if save_row_count == positions_to_test.size
423
+ filtered_data.push(data_row)
424
+ end
425
+ end
426
+
427
+ return {
428
+ :headers => result[:headers],
429
+ :data => filtered_data
430
+ }
431
+ end
305
432
  end
306
433
  end
@@ -2,14 +2,36 @@ module Biomart
2
2
  # Class representation for a biomart filter.
3
3
  # Will belong to a Biomart::Dataset.
4
4
  class Filter
5
- attr_reader :name, :display_name, :default, :default_value
5
+ attr_reader :name, :display_name, :default_value, :qualifier, :type
6
6
 
7
7
  def initialize(args)
8
- @name = args["internalName"]
9
- @display_name = args["displayName"]
10
- @default = args["defaultOn"] ? true : false
11
- @default_value = args["defaultValue"]
8
+ @name = args["internalName"]
9
+ @display_name = args["displayName"]
10
+ @default = args["defaultOn"] ? true : false
11
+ @default_value = args["defaultValue"]
12
+ @hidden = args["hideDisplay"] ? true : false
13
+ @qualifier = args["qualifier"]
14
+ @type = args["type"]
15
+ @multiple_values = args["multipleValues"] ? true : false
12
16
  end
13
17
 
18
+ # Convenience method to see if this filter is hidden from
19
+ # the standard MartView interface. Returns true/false.
20
+ def hidden?
21
+ @hidden
22
+ end
23
+
24
+ # Convenience method to see if this filter would be
25
+ # enabled by default in the standard MartView interface.
26
+ # Returns true/false.
27
+ def default?
28
+ @default
29
+ end
30
+
31
+ # Convenience method to see if this filter allows multiple
32
+ # values to be passed to it.
33
+ def multiple_values?
34
+ @multiple_values
35
+ end
14
36
  end
15
37
  end
@@ -73,9 +73,7 @@ module Biomart
73
73
  url = @url + '?type=registry'
74
74
  document = REXML::Document.new( request( :url => url ) )
75
75
  REXML::XPath.each( document, "//MartURLLocation" ) do |d|
76
- if d.attributes["visible"] === "1"
77
- @databases[ d.attributes["name"] ] = Database.new( @url, d.attributes )
78
- end
76
+ @databases[ d.attributes["name"] ] = Database.new( @url, d.attributes )
79
77
  end
80
78
  end
81
79
 
File without changes
File without changes
@@ -26,9 +26,11 @@ class BiomartTest < Test::Unit::TestCase
26
26
  end
27
27
 
28
28
  should "have basic metadata" do
29
+ true_false = [true,false]
29
30
  assert( @htgt_database.display_name, "Biomart::Database does not have a 'display name'." )
30
31
  assert( @htgt_database.name, "Biomart::Database does not have a 'name'." )
31
32
  assert( @htgt_database.visible != nil, "Biomart::Database does not have a 'visible' flag." )
33
+ assert( true_false.include?( @htgt_database.visible? ), "Biomart::Database.visible? is not returning true/false." )
32
34
  end
33
35
 
34
36
  should "have datasets" do
@@ -43,7 +45,9 @@ class BiomartTest < Test::Unit::TestCase
43
45
  @htgt_targ = @htgt.datasets["htgt_targ"]
44
46
  @htgt_trap = @htgt.datasets["htgt_trap"]
45
47
  @kermits = @htgt.datasets["kermits"]
48
+ @ensembl = @htgt.datasets["mmusculus_gene_ensembl"]
46
49
  @emma = Biomart::Dataset.new( "http://www.emmanet.org/biomart", { :name => "strains" } )
50
+ @dcc = Biomart::Dataset.new( "http://www.i-dcc.org/biomart", { :name => "dcc" } )
47
51
  end
48
52
 
49
53
  should "have basic metadata" do
@@ -64,20 +68,34 @@ class BiomartTest < Test::Unit::TestCase
64
68
  assert( @kermits.attributes["ensembl_gene_id"].is_a?( Biomart::Attribute ), "Biomart::Dataset is not creating Biomart::Attribute objects." )
65
69
  end
66
70
 
67
- should "perform count/search queries" do
68
- perform_count_queries("net/http")
69
- perform_search_queries("net/http")
70
-
71
- #if CURB_AVAILABLE
72
- # perform_count_queries("curb")
73
- # perform_search_queries("curb")
74
- # Biomart.use_net_http = true
75
- # perform_count_queries("net/http")
76
- # perform_search_queries("net/http")
77
- #else
78
- # perform_count_queries("net/http")
79
- # perform_search_queries("net/http")
80
- #end
71
+ should "perform count queries" do
72
+ htgt_count = @htgt_targ.count()
73
+ assert( htgt_count.is_a?( Integer ), "Biomart::Dataset.count is not returning integers." )
74
+ assert( htgt_count > 0, "Biomart::Dataset.count is returning zero - this is wrong!" )
75
+
76
+ htgt_count_single_filter = @htgt_targ.count( :filters => { "is_eucomm" => "1" } )
77
+ assert( htgt_count_single_filter.is_a?( Integer ), "Biomart::Dataset.count (with single filter) is not returning integers." )
78
+ assert( htgt_count_single_filter > 0, "Biomart::Dataset.count (with single filter) is returning zero - this is wrong!" )
79
+
80
+ htgt_count_single_filter_group_value = @htgt_targ.count( :filters => { "marker_symbol" => ["Cbx1","Cbx7","Art4"] } )
81
+ assert( htgt_count_single_filter_group_value.is_a?( Integer ), "Biomart::Dataset.count (with single filter, group value) is not returning integers." )
82
+ assert( htgt_count_single_filter_group_value > 0, "Biomart::Dataset.count (with single filter, group value) is returning zero - this is wrong!" )
83
+ end
84
+
85
+ should "perform search queries" do
86
+ search = @htgt_trap.search()
87
+ assert( search.is_a?( Hash ), "Biomart::Dataset.search (no options) is not returning a hash." )
88
+ assert( search[:data].is_a?( Array ), "Biomart::Dataset.search[:data] (no options) is not returning an array." )
89
+
90
+ search1 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :process_results => true )
91
+ assert( search1.is_a?( Array ), "Biomart::Dataset.search (filters defined with processing) is not returning an array." )
92
+ assert( search1.first.is_a?( Hash ), "Biomart::Dataset.search (filters defined with processing) is not returning an array of hashes." )
93
+ assert( search1.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters defined with processing) is not returning the correct info." )
94
+
95
+ search2 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :attributes => ["marker_symbol","ensembl_gene_id"], :process_results => true )
96
+ assert( search2.is_a?( Array ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array." )
97
+ assert( search2.first.is_a?( Hash ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array of hashes." )
98
+ assert( search2.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters and attributes defined with processing) is not returning the correct info." )
81
99
  end
82
100
 
83
101
  should "perform search queries whilst altering the timeout property" do
@@ -124,36 +142,151 @@ class BiomartTest < Test::Unit::TestCase
124
142
  assert( search2[:data].size > 0, "Biomart::Dataset.search[:data] for poorly formatted TSV data is empty. (EMMA Query)" )
125
143
  end
126
144
 
145
+ should "perform federated search queries" do
146
+ search_opts = {
147
+ :filters => {
148
+ "status" => [
149
+ "Mice - Genotype confirmed", "Mice - Germline transmission",
150
+ "Mice - Microinjection in progress", "ES Cells - Targeting Confirmed"
151
+ ]
152
+ },
153
+ :attributes => [ "marker_symbol", "mgi_accession_id", "status" ],
154
+ :federate => [
155
+ {
156
+ :dataset => @ensembl,
157
+ :filters => { "chromosome_name" => "1", "start" => "1", "end" => "10000000" },
158
+ :attributes => []
159
+ }
160
+ ]
161
+ }
162
+
163
+ results = @htgt_targ.search( search_opts )
164
+
165
+ assert( results.is_a?(Hash), "Biomart::Dataset.search is not returning a hash. [federated search]" )
166
+ assert( results[:data].is_a?(Array), "Biomart::Dataset.search[:data] is not returning an array. [federated search]" )
167
+ assert( results[:data][0].size === 3, "Biomart::Dataset.search[:data] is not returning 3 attributes. [federated search]" )
168
+ assert( results[:headers].size === 3, "Biomart::Dataset.search[:headers] is not returning 3 elements. [federated search]" )
169
+
170
+ assert_raise( Biomart::ArgumentError ) { @htgt_targ.count( search_opts ) }
171
+
172
+ assert_raise Biomart::ArgumentError do
173
+ search_opts[:federate] = [
174
+ {
175
+ :dataset => "mmusculus_gene_ensembl",
176
+ :filters => { "chromosome_name" => "1", "start" => "1", "end" => "10000000" },
177
+ :attributes => []
178
+ }
179
+ ]
180
+ results = @htgt_targ.search( search_opts )
181
+ end
182
+
183
+ assert_raise Biomart::ArgumentError do
184
+ search_opts[:federate] = {
185
+ :dataset => "mmusculus_gene_ensembl",
186
+ :filters => { "chromosome_name" => "1", "start" => "1", "end" => "10000000" },
187
+ :attributes => []
188
+ }
189
+ results = @htgt_targ.search( search_opts )
190
+ end
191
+
192
+ assert_raise Biomart::ArgumentError do
193
+ search_opts[:federate] = [
194
+ {
195
+ :dataset => @ensembl,
196
+ :filters => { "chromosome_name" => "1", "start" => "1", "end" => "10000000" },
197
+ :attributes => []
198
+ },
199
+ {
200
+ :dataset => @ensembl,
201
+ :filters => { "chromosome_name" => "1", "start" => "1", "end" => "10000000" },
202
+ :attributes => []
203
+ }
204
+ ]
205
+ results = @htgt_targ.search( search_opts )
206
+ end
207
+ end
208
+
209
+ should "perform search queries with the :required_attributes option" do
210
+ search_opts = {
211
+ :filters => {
212
+ "chromosome_name" => "1",
213
+ "start" => "1",
214
+ "end" => "10000000"
215
+ },
216
+ :attributes => [
217
+ "ensembl_gene_id", "ensembl_transcript_id",
218
+ "mouse_paralog_ensembl_gene", "mouse_paralog_chromosome"
219
+ ],
220
+ :required_attributes => ["mouse_paralog_ensembl_gene"]
221
+ }
222
+
223
+ results = @ensembl.search( search_opts )
224
+
225
+ assert( results.is_a?(Hash), "Biomart::Dataset.search is not returning a hash. [required_attributes search]" )
226
+ assert( results[:data].is_a?(Array), "Biomart::Dataset.search[:data] is not returning an array. [required_attributes search]" )
227
+ results[:data].each do |data_row|
228
+ assert_equal( false, data_row[2].nil?, "The required_attributes search has not filtered out nil values." )
229
+ end
230
+
231
+ assert_raise( Biomart::ArgumentError ) { @ensembl.count( search_opts ) }
232
+ assert_raise Biomart::ArgumentError do
233
+ search_opts[:required_attributes] = "mouse_paralog_ensembl_gene"
234
+ @ensembl.search( search_opts )
235
+ end
236
+
237
+ results = @dcc.search(
238
+ :filters => {
239
+ "marker_symbol" => [
240
+ "Lrrc32", "Dub3", "Hs3st4", "Hs3st4", "Hs3st4", "Hs3st4",
241
+ "Hs3st4", "Hs3st4", "Hs3st4", "Tcrg-C", "Gm5195", "Gm5198",
242
+ "Gm5199", "Gm5625", "Rpl13-ps2", "Gm5664", "Gm5928", "Gm6035",
243
+ "Gm6049"
244
+ ]
245
+ },
246
+ :required_attributes => ["ikmc_project","ikmc_project_id"],
247
+ :process_results => true
248
+ )
249
+
250
+ results.each do |data_row|
251
+ assert_equal( false, data_row["ikmc_project"].nil?, "The required_attributes search has not filtered out nil values." )
252
+ assert_equal( false, data_row["ikmc_project_id"].nil?, "The required_attributes search has not filtered out nil values." )
253
+ end
254
+ end
127
255
  end
128
256
 
129
- def perform_count_queries( library )
130
- htgt_count = @htgt_targ.count()
131
- assert( htgt_count.is_a?( Integer ), "Biomart::Dataset.count is not returning integers. [using #{library} for HTTP communication]" )
132
- assert( htgt_count > 0, "Biomart::Dataset.count is returning zero - this is wrong! [using #{library} for HTTP communication]" )
133
-
134
- htgt_count_single_filter = @htgt_targ.count( :filters => { "is_eucomm" => "1" } )
135
- assert( htgt_count_single_filter.is_a?( Integer ), "Biomart::Dataset.count (with single filter) is not returning integers. [using #{library} for HTTP communication]" )
136
- assert( htgt_count_single_filter > 0, "Biomart::Dataset.count (with single filter) is returning zero - this is wrong! [using #{library} for HTTP communication]" )
137
-
138
- htgt_count_single_filter_group_value = @htgt_targ.count( :filters => { "marker_symbol" => ["Cbx1","Cbx7","Art4"] } )
139
- assert( htgt_count_single_filter_group_value.is_a?( Integer ), "Biomart::Dataset.count (with single filter, group value) is not returning integers. [using #{library} for HTTP communication]" )
140
- assert( htgt_count_single_filter_group_value > 0, "Biomart::Dataset.count (with single filter, group value) is returning zero - this is wrong! [using #{library} for HTTP communication]" )
257
+ context "A Biomart::Attribute instance" do
258
+ setup do
259
+ @kermits = @htgt.datasets["kermits"]
260
+ end
261
+
262
+ should "have basic metadata" do
263
+ true_false = [true,false]
264
+ ens_gene_id = @kermits.attributes["ensembl_gene_id"]
265
+
266
+ assert( !ens_gene_id.name.nil?, "Biomart::Attribute.name is nil." )
267
+ assert( !ens_gene_id.display_name.nil?, "Biomart::Attribute.display_name is nil." )
268
+
269
+ assert( true_false.include?( ens_gene_id.hidden? ), "Biomart::Attribute.hidden? is not returning true/false." )
270
+ assert( true_false.include?( ens_gene_id.default? ), "Biomart::Attribute.default? is not returning true/false." )
271
+ end
141
272
  end
142
273
 
143
- def perform_search_queries( library )
144
- search = @htgt_trap.search()
145
- assert( search.is_a?( Hash ), "Biomart::Dataset.search (no options) is not returning a hash. [using #{library} for HTTP communication]" )
146
- assert( search[:data].is_a?( Array ), "Biomart::Dataset.search[:data] (no options) is not returning an array. [using #{library} for HTTP communication]" )
147
-
148
- search1 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :process_results => true )
149
- assert( search1.is_a?( Array ), "Biomart::Dataset.search (filters defined with processing) is not returning an array. [using #{library} for HTTP communication]" )
150
- assert( search1.first.is_a?( Hash ), "Biomart::Dataset.search (filters defined with processing) is not returning an array of hashes. [using #{library} for HTTP communication]" )
151
- assert( search1.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters defined with processing) is not returning the correct info. [using #{library} for HTTP communication]" )
152
-
153
- search2 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :attributes => ["marker_symbol","ensembl_gene_id"], :process_results => true )
154
- assert( search2.is_a?( Array ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array. [using #{library} for HTTP communication]" )
155
- assert( search2.first.is_a?( Hash ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array of hashes. [using #{library} for HTTP communication]" )
156
- assert( search2.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters and attributes defined with processing) is not returning the correct info. [using #{library} for HTTP communication]" )
274
+ context "A Biomart::Filter instance" do
275
+ setup do
276
+ @kermits = @htgt.datasets["kermits"]
277
+ end
278
+
279
+ should "have basic metadata" do
280
+ true_false = [true,false]
281
+ ens_gene_id = @kermits.filters["ensembl_gene_id"]
282
+
283
+ assert( !ens_gene_id.name.nil?, "Biomart::Filter.name is nil." )
284
+ assert( !ens_gene_id.display_name.nil?, "Biomart::Filter.display_name is nil." )
285
+
286
+ assert( true_false.include?( ens_gene_id.hidden? ), "Biomart::Filter.hidden? is not returning true/false." )
287
+ assert( true_false.include?( ens_gene_id.default? ), "Biomart::Filter.default? is not returning true/false." )
288
+ assert( true_false.include?( ens_gene_id.multiple_values? ), "Biomart::Filter.multiple_values? is not returning true/false." )
289
+ end
157
290
  end
158
291
 
159
292
  context "The Biomart module" do
@@ -161,7 +294,7 @@ class BiomartTest < Test::Unit::TestCase
161
294
  @not_biomart = Biomart::Server.new( "http://www.sanger.ac.uk" )
162
295
  @htgt_targ = @htgt.datasets["htgt_targ"]
163
296
  @bad_dataset = Biomart::Dataset.new( "http://www.sanger.ac.uk/htgt/biomart", { :name => "wibble" } )
164
- @good_biomart = Biomart::Server.new( "http://www.sanger.ac.uk/htgt/biomart" )
297
+ @good_biomart = Biomart::Server.new( "http://www.i-dcc.org/biomart" )
165
298
  end
166
299
 
167
300
  should "allow you to ping a server" do
@@ -171,33 +304,13 @@ class BiomartTest < Test::Unit::TestCase
171
304
  end
172
305
 
173
306
  should "handle user/configuration errors (i.e. incorrect URLs etc)" do
174
- begin
175
- @not_biomart.list_databases
176
- rescue Biomart::HTTPError => e
177
- http_error = e
178
- end
179
-
180
- assert( http_error.is_a?( Biomart::HTTPError ), "Biomart.request is not processing HTTP errors correctly." )
307
+ assert_raise( Biomart::HTTPError ) { @not_biomart.list_databases }
181
308
  end
182
309
 
183
310
  should "handle biomart server errors gracefully" do
184
- begin
185
- @htgt_targ.count( :filters => { "wibbleblibbleblip" => "1" } )
186
- rescue Biomart::FilterError => e
187
- filter_error = e
188
- end
189
-
190
- begin
191
- @htgt_targ.search( :attributes => ["wibbleblibbleblip"] )
192
- rescue Biomart::AttributeError => e
193
- attribute_error = e
194
- end
195
-
196
- begin
197
- @bad_dataset.count()
198
- rescue Biomart::DatasetError => e
199
- dataset_error = e
200
- end
311
+ assert_raise( Biomart::FilterError ) { @htgt_targ.count( :filters => { "wibbleblibbleblip" => "1" } ) }
312
+ assert_raise( Biomart::AttributeError ) { @htgt_targ.search( :attributes => ["wibbleblibbleblip"] ) }
313
+ assert_raise( Biomart::DatasetError ) { @bad_dataset.count() }
201
314
 
202
315
  begin
203
316
  @bad_dataset.count()
@@ -205,10 +318,7 @@ class BiomartTest < Test::Unit::TestCase
205
318
  general_error = e
206
319
  end
207
320
 
208
- assert( filter_error.is_a?( Biomart::FilterError ), "Biomart.request is not handling Biomart filter errors correctly." )
209
- assert( attribute_error.is_a?( Biomart::AttributeError ), "Biomart.request is not handling Biomart attribute errors correctly." )
210
- assert( dataset_error.is_a?( Biomart::DatasetError ), "Biomart.request is not handling Biomart dataset errors correctly." )
211
- assert( general_error.is_a?( Biomart::BiomartError ), "Biomart.request is not handling general Biomart errors correctly." )
321
+ assert( general_error.is_a?(Biomart::BiomartError), "Biomart.request is not handling general Biomart errors correctly." )
212
322
  end
213
323
  end
214
324
  end
metadata CHANGED
@@ -1,7 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biomart
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ hash: 23
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 2
9
+ - 0
10
+ version: 0.2.0
5
11
  platform: ruby
6
12
  authors:
7
13
  - Darren Oakley
@@ -9,40 +15,55 @@ autorequire:
9
15
  bindir: bin
10
16
  cert_chain: []
11
17
 
12
- date: 2010-02-04 00:00:00 +00:00
18
+ date: 2010-06-10 00:00:00 +01:00
13
19
  default_executable:
14
20
  dependencies:
15
21
  - !ruby/object:Gem::Dependency
16
22
  name: builder
17
- type: :runtime
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
20
26
  requirements:
21
27
  - - ">="
22
28
  - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
23
32
  version: "0"
24
- version:
33
+ type: :runtime
34
+ version_requirements: *id001
25
35
  - !ruby/object:Gem::Dependency
26
- name: thoughtbot-shoulda
27
- type: :development
28
- version_requirement:
29
- version_requirements: !ruby/object:Gem::Requirement
36
+ name: shoulda
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
30
40
  requirements:
31
41
  - - ">="
32
42
  - !ruby/object:Gem::Version
33
- version: "0"
34
- version:
43
+ hash: 23
44
+ segments:
45
+ - 2
46
+ - 10
47
+ version: "2.10"
48
+ type: :development
49
+ version_requirements: *id002
35
50
  - !ruby/object:Gem::Dependency
36
51
  name: hoe
37
- type: :development
38
- version_requirement:
39
- version_requirements: !ruby/object:Gem::Requirement
52
+ prerelease: false
53
+ requirement: &id003 !ruby/object:Gem::Requirement
54
+ none: false
40
55
  requirements:
41
56
  - - ">="
42
57
  - !ruby/object:Gem::Version
43
- version: 2.3.3
44
- version:
45
- description: A ruby API for interacting with Biomart services.
58
+ hash: 21
59
+ segments:
60
+ - 2
61
+ - 6
62
+ - 1
63
+ version: 2.6.1
64
+ type: :development
65
+ version_requirements: *id003
66
+ description: A ruby API for interacting with Biomart XML based webservices.
46
67
  email:
47
68
  - daz.oakley@gmail.com
48
69
  executables: []
@@ -68,8 +89,8 @@ files:
68
89
  - script/console
69
90
  - script/destroy
70
91
  - script/generate
71
- - tasks/metrics.task
72
- - tasks/shoulda.task
92
+ - tasks/metrics.rake
93
+ - tasks/shoulda.rake
73
94
  - test/test_biomart.rb
74
95
  - test/test_helper.rb
75
96
  has_rdoc: true
@@ -83,21 +104,27 @@ rdoc_options:
83
104
  require_paths:
84
105
  - lib
85
106
  required_ruby_version: !ruby/object:Gem::Requirement
107
+ none: false
86
108
  requirements:
87
109
  - - ">="
88
110
  - !ruby/object:Gem::Version
111
+ hash: 3
112
+ segments:
113
+ - 0
89
114
  version: "0"
90
- version:
91
115
  required_rubygems_version: !ruby/object:Gem::Requirement
116
+ none: false
92
117
  requirements:
93
118
  - - ">="
94
119
  - !ruby/object:Gem::Version
120
+ hash: 3
121
+ segments:
122
+ - 0
95
123
  version: "0"
96
- version:
97
124
  requirements: []
98
125
 
99
126
  rubyforge_project: biomart
100
- rubygems_version: 1.3.5
127
+ rubygems_version: 1.3.7
101
128
  signing_key:
102
129
  specification_version: 3
103
130
  summary: A ruby API for interacting with Biomart services.