biomart 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,10 @@
1
+ === 0.2.0 2010-06-10
2
+
3
+ * 2 major enhancements:
4
+ * Added the ability to perform federated queries across two datasets.
5
+ * Added the option to discard rows of data if a specified list of
6
+ fields are not present.
7
+
1
8
  === 0.1.5 2010-02-04
2
9
 
3
10
  * 1 major bugfix:
@@ -12,7 +12,7 @@ lib/biomart/server.rb
12
12
  script/console
13
13
  script/destroy
14
14
  script/generate
15
- tasks/metrics.task
16
- tasks/shoulda.task
15
+ tasks/metrics.rake
16
+ tasks/shoulda.rake
17
17
  test/test_biomart.rb
18
18
  test/test_helper.rb
@@ -79,7 +79,57 @@ just get on with things...
79
79
  # etc. etc.
80
80
 
81
81
  See Biomart module and Class docs for more detail.
82
+
83
+ == Federated Searches
84
+
85
+ To perform a federated search across two datasets...
86
+
87
+ htgt = Biomart::Server.new( "http://www.sanger.ac.uk/htgt/biomart" )
88
+
89
+ res = htgt.datasets["htgt_targ"].search(
90
+ :filters => {
91
+ "status" => [
92
+ "Mice - Genotype confirmed",
93
+ "Mice - Germline transmission",
94
+ "Mice - Microinjection in progress",
95
+ "ES Cells - Targeting Confirmed"
96
+ ]
97
+ },
98
+ :attributes => [
99
+ "marker_symbol",
100
+ "mgi_accession_id",
101
+ "status"
102
+ ],
103
+ :federate => [
104
+ {
105
+ :dataset => htgt.datasets["mmusculus_gene_ensembl"],
106
+ :filters => {
107
+ "chromosome_name" => "1",
108
+ "start" => "1",
109
+ "end" => "10000000"
110
+ },
111
+ :attributes => []
112
+ }
113
+ ]
114
+ )
82
115
 
116
+ The above will perform a federated query for all genes with available
117
+ mice knockout es cells in the first 10Mb of chromosome 1 from the IKMC
118
+ projects (not really important, but it's an example of a complex query).
119
+
120
+ The basic search arguments are the same as if we were searching across
121
+ a single dataset, with the addition of the :federate option, which is
122
+ an array of hashes for each dataset (and additional filters/attributes)
123
+ that we want to federate our search with.
124
+
125
+ Note: at present you can only federate across two datasets, this is
126
+ limitation in the current stable release of biomart (0.7). If you try
127
+ federate across more than two datasets, a Biomart::ArgumentError will
128
+ be raised. This limitation shall be removed from this API when it is
129
+ possible to federate across more than two datasets in biomart itself.
130
+
131
+ Count queries are only allowed on single datasets.
132
+
83
133
  == Using a Proxy
84
134
 
85
135
  If you need to channel all of your requests via a proxy, specify your
data/Rakefile CHANGED
@@ -1,5 +1,8 @@
1
1
  require "rubygems"
2
+
3
+ gem "flog", "= 2.2.0"
2
4
  gem "hoe", ">= 2.1.0"
5
+
3
6
  require "hoe"
4
7
  require "fileutils"
5
8
  require "./lib/biomart"
@@ -15,14 +18,14 @@ $hoe = Hoe.spec "biomart" do
15
18
  self.rubyforge_name = self.name
16
19
  self.url = "http://github.com/dazoakley/biomart"
17
20
  self.summary = "A ruby API for interacting with Biomart services."
18
- self.description = "A ruby API for interacting with Biomart services."
21
+ self.description = "A ruby API for interacting with Biomart XML based webservices."
19
22
  self.extra_deps = [["builder",">= 0"]]
20
- self.extra_dev_deps = [["thoughtbot-shoulda",">=0"]]
23
+ self.extra_dev_deps = [["shoulda",">= 2.10"]]
21
24
  self.extra_rdoc_files = ["README.rdoc"]
22
25
  end
23
26
 
24
27
  require "newgem/tasks"
25
- Dir["tasks/*.task"].each { |t| load t }
28
+ Dir["tasks/*.rake"].each { |t| load t }
26
29
 
27
30
  # TODO - want other tests/tasks run by default? Add them to the list
28
31
  # remove_task :default
@@ -2,12 +2,12 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{biomart}
5
- s.version = "0.1.5"
5
+ s.version = "0.2.0"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Darren Oakley"]
9
- s.date = %q{2010-02-04}
10
- s.description = %q{A ruby API for interacting with Biomart services.}
9
+ s.date = %q{2010-06-10}
10
+ s.description = %q{A ruby API for interacting with Biomart XML based webservices.}
11
11
  s.email = ["daz.oakley@gmail.com"]
12
12
  s.extra_rdoc_files = ["History.txt", "Manifest.txt", "README.rdoc"]
13
13
  s.files = ["History.txt", "Manifest.txt", "README.rdoc", "Rakefile", "biomart.gemspec", "lib/biomart.rb", "lib/biomart/attribute.rb", "lib/biomart/database.rb", "lib/biomart/dataset.rb", "lib/biomart/filter.rb", "lib/biomart/server.rb", "script/console", "script/destroy", "script/generate", "tasks/metrics.task", "tasks/shoulda.task", "test/test_biomart.rb", "test/test_helper.rb"]
@@ -15,7 +15,7 @@ Gem::Specification.new do |s|
15
15
  s.rdoc_options = ["--main", "README.rdoc"]
16
16
  s.require_paths = ["lib"]
17
17
  s.rubyforge_project = %q{biomart}
18
- s.rubygems_version = %q{1.3.5}
18
+ s.rubygems_version = %q{1.3.7}
19
19
  s.summary = %q{A ruby API for interacting with Biomart services.}
20
20
  s.test_files = ["test/test_biomart.rb", "test/test_helper.rb"]
21
21
 
@@ -23,14 +23,18 @@ Gem::Specification.new do |s|
23
23
  current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
24
24
  s.specification_version = 3
25
25
 
26
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
26
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
27
27
  s.add_runtime_dependency(%q<builder>, [">= 0"])
28
- s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
29
- s.add_development_dependency(%q<hoe>, [">= 2.3.3"])
28
+ s.add_development_dependency(%q<shoulda>, [">= 2.10"])
29
+ s.add_development_dependency(%q<hoe>, [">= 2.6.1"])
30
30
  else
31
31
  s.add_dependency(%q<builder>, [">= 0"])
32
+ s.add_dependency(%q<shoulda>, [">= 2.10"])
33
+ s.add_dependency(%q<hoe>, [">= 2.6.1"])
32
34
  end
33
35
  else
34
36
  s.add_dependency(%q<builder>, [">= 0"])
37
+ s.add_dependency(%q<shoulda>, [">= 2.10"])
38
+ s.add_dependency(%q<hoe>, [">= 2.6.1"])
35
39
  end
36
40
  end
@@ -1,21 +1,14 @@
1
1
  require "uri"
2
2
  require "net/http"
3
+ require "cgi"
3
4
  require "rexml/document"
4
5
  require "csv"
5
6
 
6
7
  require "rubygems"
7
8
  require "builder"
8
9
 
9
- #begin
10
- # require "curb"
11
- # use_curb = true
12
- #rescue LoadError
13
- # use_curb = false
14
- #end
15
- #CURB_AVAILABLE = use_curb
16
-
17
10
  module Biomart
18
- VERSION = "0.1.5"
11
+ VERSION = "0.2.0"
19
12
 
20
13
  # This is the base Biomart error/exception class. Rescue it if
21
14
  # you want to catch any exceptions that this code might raise.
@@ -46,76 +39,49 @@ module Biomart
46
39
  # dataset.
47
40
  class DatasetError < BiomartError; end
48
41
 
42
+ # Error class representing errors in the arguments being passed
43
+ # to the api.
44
+ class ArgumentError < BiomartError; end
45
+
49
46
  # Centralised request function for handling all of the HTTP requests
50
47
  # to the biomart servers.
51
48
  def request( params={} )
52
- net_http_request(params)
49
+ if params[:url] =~ / /
50
+ params[:url].gsub!(" ","+")
51
+ end
52
+
53
+ uri = URI.parse( params[:url] )
54
+ client = net_http_client()
55
+ req = nil
56
+ response = nil
57
+
58
+ case params[:method]
59
+ when 'post'
60
+ req = Net::HTTP::Post.new(uri.path)
61
+ req.form_data = { "query" => params[:query] }
62
+ else
63
+ req = Net::HTTP::Get.new(uri.request_uri)
64
+ end
65
+
66
+ client.start(uri.host, uri.port) do |http|
67
+ if Biomart.timeout or params[:timeout]
68
+ http.read_timeout = params[:timeout] ? params[:timeout] : Biomart.timeout
69
+ http.open_timeout = params[:timeout] ? params[:timeout] : Biomart.timeout
70
+ end
71
+ response = http.request(req)
72
+ end
53
73
 
54
- #if CURB_AVAILABLE and ( Biomart.use_net_http != true )
55
- # curb_request(params)
56
- #else
57
- # net_http_request(params)
58
- #end
74
+ check_response( response.body, response.code )
75
+
76
+ return response.body
59
77
  end
60
78
 
61
79
  class << self
62
- attr_accessor :proxy, :timeout, :use_net_http
80
+ attr_accessor :proxy, :timeout
63
81
  end
64
82
 
65
83
  private
66
84
 
67
- # Utility function to perform the request method using the curb
68
- # gem (a wrapper around libcurl) - supposed to be faster than
69
- # Net::HTTP.
70
- def curb_request( params={} )
71
- client = Curl::Easy.new( params[:url] )
72
-
73
- if Biomart.timeout or params[:timeout]
74
- client.connect_timeout = params[:timeout] ? params[:timeout] : Biomart.timeout
75
- end
76
-
77
- if proxy_url() then client.proxy_url = proxy_url() end
78
-
79
- case params[:method]
80
- when 'post'
81
- client.http_post( Curl::PostField.content( "query", params[:query], "text/xml" ) )
82
- else
83
- client.http_get
84
- end
85
-
86
- check_response( client.body_str, client.response_code )
87
-
88
- return client.body_str
89
- end
90
-
91
- # Utility function to perform the request method using Net::HTTP.
92
- def net_http_request( params={} )
93
- uri = URI.parse( params[:url] )
94
- client = net_http_client()
95
- req = nil
96
- response = nil
97
-
98
- case params[:method]
99
- when 'post'
100
- req = Net::HTTP::Post.new(uri.path)
101
- req.form_data = { "query" => params[:query] }
102
- else
103
- req = Net::HTTP::Get.new(uri.request_uri)
104
- end
105
-
106
- client.start(uri.host, uri.port) do |http|
107
- if Biomart.timeout or params[:timeout]
108
- http.read_timeout = params[:timeout] ? params[:timeout] : Biomart.timeout
109
- http.open_timeout = params[:timeout] ? params[:timeout] : Biomart.timeout
110
- end
111
- response = http.request(req)
112
- end
113
-
114
- check_response( response.body, response.code )
115
-
116
- return response.body
117
- end
118
-
119
85
  # Utility function to create a Net::HTTP object.
120
86
  def net_http_client
121
87
  client = Net::HTTP
@@ -2,13 +2,26 @@ module Biomart
2
2
  # Class representation for a biomart attribute.
3
3
  # Will belong to a Biomart::Dataset.
4
4
  class Attribute
5
- attr_reader :name, :display_name, :default
5
+ attr_reader :name, :display_name
6
6
 
7
7
  def initialize(args)
8
8
  @name = args["internalName"]
9
9
  @display_name = args["displayName"]
10
10
  @default = args["default"] ? true : false
11
+ @hidden = args["hideDisplay"] ? true : false
11
12
  end
12
13
 
14
+ # Convenience method to see if this attribute is hidden from
15
+ # the standard MartView interface. Returns true/false.
16
+ def hidden?
17
+ @hidden
18
+ end
19
+
20
+ # Convenience method to see if this attribute would be
21
+ # enabled by default in the standard MartView interface.
22
+ # Returns true/false.
23
+ def default?
24
+ @default
25
+ end
13
26
  end
14
27
  end
@@ -36,6 +36,12 @@ module Biomart
36
36
  return @datasets
37
37
  end
38
38
 
39
+ # Returns true / false if this database is visbile in the
40
+ # default MartView interface.
41
+ def visible?
42
+ @visible
43
+ end
44
+
39
45
  private
40
46
 
41
47
  # Utility method to do the webservice call to the biomart server
@@ -62,10 +62,20 @@ module Biomart
62
62
  # the result of the count query.
63
63
  #
64
64
  # optional arguments:
65
- #
66
- # :filters:: hash of key-value pairs (filter => search term)
67
- # :timeout:: set a timeout length for the request (secs)
65
+ #
66
+ # {
67
+ # :timeout => integer, # set a timeout length for the request (secs)
68
+ # :filters => {} # hash of key-value pairs (filter => search term)
69
+ # }
68
70
  def count( args={} )
71
+ if args[:federate]
72
+ raise Biomart::ArgumentError, "You cannot federate a count query."
73
+ end
74
+
75
+ if args[:required_attributes]
76
+ raise Biomart::ArgumentError, "The :required_attributes option is not allowed on count queries."
77
+ end
78
+
69
79
  result = request(
70
80
  :method => 'post',
71
81
  :url => @url,
@@ -82,30 +92,51 @@ module Biomart
82
92
  # Function to perform a Biomart search.
83
93
  #
84
94
  # optional arguments:
95
+ #
96
+ # {
97
+ # :process_results => true/false, # convert search results to object
98
+ # :timeout => integer, # set a timeout length for the request (secs)
99
+ # :filters => {}, # hash of key-value pairs (filter => search term)
100
+ # :attributes => [], # array of attributes to retrieve
101
+ # :required_attributes => [], # array of attributes that are required
102
+ # :federate => [
103
+ # {
104
+ # :dataset => Biomart::Dataset, # A dataset object to federate with
105
+ # :filters => {}, # hash of key-value pairs (filter => search term)
106
+ # :attributes => [] # array of attributes to retrieve
107
+ # }
108
+ # ]
109
+ # }
110
+ #
111
+ # Note, if you do not pass any filters or attributes arguments, the defaults
112
+ # for the dataset shall be used.
85
113
  #
86
- # :filters:: hash of key-value pairs (filter => search term)
87
- # :attributes:: array of attributes to retrieve
88
- # :process_results:: true/false - convert search results to object
89
- # :timeout:: set a timeout length for the request (secs)
114
+ # Also, using the :required_attributes option - this performs AND logic and will require
115
+ # data to be returned in all of the listed attributes in order for it to be returned.
90
116
  #
91
117
  # By default will return a hash with the following:
92
118
  #
93
- # :headers:: array of headers
94
- # :data:: array of arrays containing search results
119
+ # {
120
+ # :headers => [], # array of headers
121
+ # :data => [] # array of arrays containing search results
122
+ # }
95
123
  #
96
124
  # But with the :process_results option will return an array of hashes,
97
125
  # where each hash represents a row of results (keyed by the attribute name).
98
126
  def search( args={} )
127
+ if args[:required_attributes] and !args[:required_attributes].is_a?(Array)
128
+ raise Biomart::ArgumentError, "The :required_attributes option must be passed as an array."
129
+ end
130
+
99
131
  response = request(
100
132
  :method => 'post',
101
133
  :url => @url,
102
134
  :timeout => args[:timeout],
103
- :query => generate_xml(
104
- :filters => args[:filters],
105
- :attributes => args[:attributes]
106
- )
135
+ :query => generate_xml( process_xml_args(args) )
107
136
  )
137
+
108
138
  result = process_tsv( args, response )
139
+ result = filter_data_rows( args, result ) if args[:required_attributes]
109
140
  result = conv_results_to_a_of_h( result ) if args[:process_results]
110
141
  return result
111
142
  end
@@ -118,38 +149,22 @@ module Biomart
118
149
  xml.instruct!
119
150
  xml.declare!( :DOCTYPE, :Query )
120
151
  xml.Query( :virtualSchemaName => "default", :formatter => "TSV", :header => "0", :uniqueRows => "1", :count => args[:count], :datasetConfigVersion => "0.6" ) {
121
- xml.Dataset( :name => @name, :interface => "default" ) {
122
-
123
- if args[:filters]
124
- args[:filters].each do |name,value|
125
- if value.is_a? Array
126
- value = value.join(",")
127
- end
128
- xml.Filter( :name => name, :value => value )
129
- end
130
- else
131
- self.filters.each do |name,filter|
132
- if filter.default
133
- xml.Filter( :name => name, :value => filter.default_value )
134
- end
135
- end
136
- end
137
-
138
- unless args[:count]
139
- if args[:attributes]
140
- args[:attributes].each do |name|
141
- xml.Attribute( :name => name )
142
- end
143
- else
144
- self.attributes.each do |name,attribute|
145
- if attribute.default
146
- xml.Attribute( :name => name )
147
- end
148
- end
152
+ dataset_xml( xml, self, { :filters => args[:filters], :attributes => args[:attributes] } )
153
+
154
+ if args[:federate]
155
+ args[:federate].each do |joined_dataset|
156
+ unless joined_dataset[:dataset].is_a?(Biomart::Dataset)
157
+ raise Biomart::ArgumentError, "You must pass a Biomart::Dataset object to the :federate[:dataset] option."
149
158
  end
159
+
160
+ dataset_xml(
161
+ xml,
162
+ joined_dataset[:dataset],
163
+ { :filters => joined_dataset[:filters], :attributes => joined_dataset[:attributes] }
164
+ )
150
165
  end
151
-
152
- }
166
+ end
167
+
153
168
  }
154
169
 
155
170
  return biomart_xml
@@ -190,21 +205,77 @@ module Biomart
190
205
  end
191
206
  end
192
207
 
208
+ # Utility function to process and test the arguments passed for
209
+ # the xml query.
210
+ def process_xml_args( args={} )
211
+ xml_args = {
212
+ :filters => args[:filters],
213
+ :attributes => args[:attributes]
214
+ }
215
+
216
+ if args[:federate]
217
+ unless args[:federate].is_a?(Array)
218
+ raise Biomart::ArgumentError, "The :federate option must be passed as an array."
219
+ end
220
+
221
+ unless args[:federate].size == 1
222
+ raise Biomart::ArgumentError, "Sorry, we can only federate two datasets at present. This limitation shall be lifted in version 0.8 of biomart."
223
+ end
224
+
225
+ xml_args[:federate] = args[:federate]
226
+ end
227
+
228
+ return xml_args
229
+ end
230
+
231
+ # Helper function to produce the portion of the biomart xml for
232
+ # a dataset query.
233
+ def dataset_xml( xml, dataset, args )
234
+ xml.Dataset( :name => dataset.name, :interface => "default" ) {
235
+
236
+ if args[:filters]
237
+ args[:filters].each do |name,value|
238
+ if value.is_a? Array
239
+ value = value.join(",")
240
+ end
241
+ xml.Filter( :name => name, :value => value )
242
+ end
243
+ else
244
+ dataset.filters.each do |name,filter|
245
+ if filter.default?
246
+ xml.Filter( :name => name, :value => filter.default_value )
247
+ end
248
+ end
249
+ end
250
+
251
+ unless args[:count]
252
+ if args[:attributes]
253
+ args[:attributes].each do |name|
254
+ xml.Attribute( :name => name )
255
+ end
256
+ else
257
+ dataset.attributes.each do |name,attribute|
258
+ if attribute.default?
259
+ xml.Attribute( :name => name )
260
+ end
261
+ end
262
+ end
263
+ end
264
+
265
+ }
266
+ end
267
+
193
268
  # Utility function to transform the tab-separated data retrieved
194
269
  # from the Biomart search query into a ruby object.
195
270
  def process_tsv( args, tsv )
196
271
  headers = []
197
272
  parsed_data = []
273
+
274
+ append_header_attributes_for_tsv( headers, self, args[:attributes] )
198
275
 
199
- if args[:attributes]
200
- args[:attributes].each do |attribute|
201
- headers.push(attribute)
202
- end
203
- else
204
- self.attributes.each do |name,attribute|
205
- if attribute.default
206
- headers.push(name)
207
- end
276
+ if args[:federate]
277
+ args[:federate].each do |joined_dataset|
278
+ append_header_attributes_for_tsv( headers, joined_dataset[:dataset], joined_dataset[:attributes] )
208
279
  end
209
280
  end
210
281
 
@@ -231,6 +302,22 @@ module Biomart
231
302
  }
232
303
  end
233
304
 
305
+ # Helper function to append the attribute names to the 'headers' array
306
+ # for processing the returned results.
307
+ def append_header_attributes_for_tsv( headers, dataset, attributes )
308
+ if attributes
309
+ attributes.each do |attribute|
310
+ headers.push(attribute)
311
+ end
312
+ else
313
+ dataset.attributes.each do |name,attribute|
314
+ if attribute.default?
315
+ headers.push(name)
316
+ end
317
+ end
318
+ end
319
+ end
320
+
234
321
  # Utility function to process TSV formatted data that raises errors. (Biomart
235
322
  # has a habit of serving out this...) First attempts to use the CSV modules
236
323
  # 'parse_line' function to read in the data, if that fails, tries to use split
@@ -301,6 +388,46 @@ module Biomart
301
388
 
302
389
  return result_objects
303
390
  end
304
-
391
+
392
+ # Utility function to remove data rows from a search result that do not include
393
+ # the :required_attributes.
394
+ def filter_data_rows( args, result )
395
+ # Get the list of attributes searched for...
396
+ attributes = args[:attributes] ? args[:attributes] : []
397
+ if attributes.empty?
398
+ self.attributes.each do |name,attribute|
399
+ if attribute.default?
400
+ attributes.push(name)
401
+ end
402
+ end
403
+ end
404
+
405
+ # Work out which attribute positions we need to test...
406
+ positions_to_test = []
407
+ attributes.each_index do |index|
408
+ if args[:required_attributes].include?(attributes[index])
409
+ positions_to_test.push(index)
410
+ end
411
+ end
412
+
413
+ # Now go through the results and filter out the unwanted data...
414
+ filtered_data = []
415
+ result[:data].each do |data_row|
416
+ save_row_count = 0
417
+
418
+ positions_to_test.each do |position|
419
+ save_row_count = save_row_count + 1 unless data_row[position].nil?
420
+ end
421
+
422
+ if save_row_count == positions_to_test.size
423
+ filtered_data.push(data_row)
424
+ end
425
+ end
426
+
427
+ return {
428
+ :headers => result[:headers],
429
+ :data => filtered_data
430
+ }
431
+ end
305
432
  end
306
433
  end
@@ -2,14 +2,36 @@ module Biomart
2
2
  # Class representation for a biomart filter.
3
3
  # Will belong to a Biomart::Dataset.
4
4
  class Filter
5
- attr_reader :name, :display_name, :default, :default_value
5
+ attr_reader :name, :display_name, :default_value, :qualifier, :type
6
6
 
7
7
  def initialize(args)
8
- @name = args["internalName"]
9
- @display_name = args["displayName"]
10
- @default = args["defaultOn"] ? true : false
11
- @default_value = args["defaultValue"]
8
+ @name = args["internalName"]
9
+ @display_name = args["displayName"]
10
+ @default = args["defaultOn"] ? true : false
11
+ @default_value = args["defaultValue"]
12
+ @hidden = args["hideDisplay"] ? true : false
13
+ @qualifier = args["qualifier"]
14
+ @type = args["type"]
15
+ @multiple_values = args["multipleValues"] ? true : false
12
16
  end
13
17
 
18
+ # Convenience method to see if this filter is hidden from
19
+ # the standard MartView interface. Returns true/false.
20
+ def hidden?
21
+ @hidden
22
+ end
23
+
24
+ # Convenience method to see if this filter would be
25
+ # enabled by default in the standard MartView interface.
26
+ # Returns true/false.
27
+ def default?
28
+ @default
29
+ end
30
+
31
+ # Convenience method to see if this filter allows multiple
32
+ # values to be passed to it.
33
+ def multiple_values?
34
+ @multiple_values
35
+ end
14
36
  end
15
37
  end
@@ -73,9 +73,7 @@ module Biomart
73
73
  url = @url + '?type=registry'
74
74
  document = REXML::Document.new( request( :url => url ) )
75
75
  REXML::XPath.each( document, "//MartURLLocation" ) do |d|
76
- if d.attributes["visible"] === "1"
77
- @databases[ d.attributes["name"] ] = Database.new( @url, d.attributes )
78
- end
76
+ @databases[ d.attributes["name"] ] = Database.new( @url, d.attributes )
79
77
  end
80
78
  end
81
79
 
File without changes
File without changes
@@ -26,9 +26,11 @@ class BiomartTest < Test::Unit::TestCase
26
26
  end
27
27
 
28
28
  should "have basic metadata" do
29
+ true_false = [true,false]
29
30
  assert( @htgt_database.display_name, "Biomart::Database does not have a 'display name'." )
30
31
  assert( @htgt_database.name, "Biomart::Database does not have a 'name'." )
31
32
  assert( @htgt_database.visible != nil, "Biomart::Database does not have a 'visible' flag." )
33
+ assert( true_false.include?( @htgt_database.visible? ), "Biomart::Database.visible? is not returning true/false." )
32
34
  end
33
35
 
34
36
  should "have datasets" do
@@ -43,7 +45,9 @@ class BiomartTest < Test::Unit::TestCase
43
45
  @htgt_targ = @htgt.datasets["htgt_targ"]
44
46
  @htgt_trap = @htgt.datasets["htgt_trap"]
45
47
  @kermits = @htgt.datasets["kermits"]
48
+ @ensembl = @htgt.datasets["mmusculus_gene_ensembl"]
46
49
  @emma = Biomart::Dataset.new( "http://www.emmanet.org/biomart", { :name => "strains" } )
50
+ @dcc = Biomart::Dataset.new( "http://www.i-dcc.org/biomart", { :name => "dcc" } )
47
51
  end
48
52
 
49
53
  should "have basic metadata" do
@@ -64,20 +68,34 @@ class BiomartTest < Test::Unit::TestCase
64
68
  assert( @kermits.attributes["ensembl_gene_id"].is_a?( Biomart::Attribute ), "Biomart::Dataset is not creating Biomart::Attribute objects." )
65
69
  end
66
70
 
67
- should "perform count/search queries" do
68
- perform_count_queries("net/http")
69
- perform_search_queries("net/http")
70
-
71
- #if CURB_AVAILABLE
72
- # perform_count_queries("curb")
73
- # perform_search_queries("curb")
74
- # Biomart.use_net_http = true
75
- # perform_count_queries("net/http")
76
- # perform_search_queries("net/http")
77
- #else
78
- # perform_count_queries("net/http")
79
- # perform_search_queries("net/http")
80
- #end
71
+ should "perform count queries" do
72
+ htgt_count = @htgt_targ.count()
73
+ assert( htgt_count.is_a?( Integer ), "Biomart::Dataset.count is not returning integers." )
74
+ assert( htgt_count > 0, "Biomart::Dataset.count is returning zero - this is wrong!" )
75
+
76
+ htgt_count_single_filter = @htgt_targ.count( :filters => { "is_eucomm" => "1" } )
77
+ assert( htgt_count_single_filter.is_a?( Integer ), "Biomart::Dataset.count (with single filter) is not returning integers." )
78
+ assert( htgt_count_single_filter > 0, "Biomart::Dataset.count (with single filter) is returning zero - this is wrong!" )
79
+
80
+ htgt_count_single_filter_group_value = @htgt_targ.count( :filters => { "marker_symbol" => ["Cbx1","Cbx7","Art4"] } )
81
+ assert( htgt_count_single_filter_group_value.is_a?( Integer ), "Biomart::Dataset.count (with single filter, group value) is not returning integers." )
82
+ assert( htgt_count_single_filter_group_value > 0, "Biomart::Dataset.count (with single filter, group value) is returning zero - this is wrong!" )
83
+ end
84
+
85
+ should "perform search queries" do
86
+ search = @htgt_trap.search()
87
+ assert( search.is_a?( Hash ), "Biomart::Dataset.search (no options) is not returning a hash." )
88
+ assert( search[:data].is_a?( Array ), "Biomart::Dataset.search[:data] (no options) is not returning an array." )
89
+
90
+ search1 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :process_results => true )
91
+ assert( search1.is_a?( Array ), "Biomart::Dataset.search (filters defined with processing) is not returning an array." )
92
+ assert( search1.first.is_a?( Hash ), "Biomart::Dataset.search (filters defined with processing) is not returning an array of hashes." )
93
+ assert( search1.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters defined with processing) is not returning the correct info." )
94
+
95
+ search2 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :attributes => ["marker_symbol","ensembl_gene_id"], :process_results => true )
96
+ assert( search2.is_a?( Array ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array." )
97
+ assert( search2.first.is_a?( Hash ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array of hashes." )
98
+ assert( search2.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters and attributes defined with processing) is not returning the correct info." )
81
99
  end
82
100
 
83
101
  should "perform search queries whilst altering the timeout property" do
@@ -124,36 +142,151 @@ class BiomartTest < Test::Unit::TestCase
124
142
  assert( search2[:data].size > 0, "Biomart::Dataset.search[:data] for poorly formatted TSV data is empty. (EMMA Query)" )
125
143
  end
126
144
 
145
+ should "perform federated search queries" do
146
+ search_opts = {
147
+ :filters => {
148
+ "status" => [
149
+ "Mice - Genotype confirmed", "Mice - Germline transmission",
150
+ "Mice - Microinjection in progress", "ES Cells - Targeting Confirmed"
151
+ ]
152
+ },
153
+ :attributes => [ "marker_symbol", "mgi_accession_id", "status" ],
154
+ :federate => [
155
+ {
156
+ :dataset => @ensembl,
157
+ :filters => { "chromosome_name" => "1", "start" => "1", "end" => "10000000" },
158
+ :attributes => []
159
+ }
160
+ ]
161
+ }
162
+
163
+ results = @htgt_targ.search( search_opts )
164
+
165
+ assert( results.is_a?(Hash), "Biomart::Dataset.search is not returning a hash. [federated search]" )
166
+ assert( results[:data].is_a?(Array), "Biomart::Dataset.search[:data] is not returning an array. [federated search]" )
167
+ assert( results[:data][0].size === 3, "Biomart::Dataset.search[:data] is not returning 3 attributes. [federated search]" )
168
+ assert( results[:headers].size === 3, "Biomart::Dataset.search[:headers] is not returning 3 elements. [federated search]" )
169
+
170
+ assert_raise( Biomart::ArgumentError ) { @htgt_targ.count( search_opts ) }
171
+
172
+ assert_raise Biomart::ArgumentError do
173
+ search_opts[:federate] = [
174
+ {
175
+ :dataset => "mmusculus_gene_ensembl",
176
+ :filters => { "chromosome_name" => "1", "start" => "1", "end" => "10000000" },
177
+ :attributes => []
178
+ }
179
+ ]
180
+ results = @htgt_targ.search( search_opts )
181
+ end
182
+
183
+ assert_raise Biomart::ArgumentError do
184
+ search_opts[:federate] = {
185
+ :dataset => "mmusculus_gene_ensembl",
186
+ :filters => { "chromosome_name" => "1", "start" => "1", "end" => "10000000" },
187
+ :attributes => []
188
+ }
189
+ results = @htgt_targ.search( search_opts )
190
+ end
191
+
192
+ assert_raise Biomart::ArgumentError do
193
+ search_opts[:federate] = [
194
+ {
195
+ :dataset => @ensembl,
196
+ :filters => { "chromosome_name" => "1", "start" => "1", "end" => "10000000" },
197
+ :attributes => []
198
+ },
199
+ {
200
+ :dataset => @ensembl,
201
+ :filters => { "chromosome_name" => "1", "start" => "1", "end" => "10000000" },
202
+ :attributes => []
203
+ }
204
+ ]
205
+ results = @htgt_targ.search( search_opts )
206
+ end
207
+ end
208
+
209
+ should "perform search queries with the :required_attributes option" do
210
+ search_opts = {
211
+ :filters => {
212
+ "chromosome_name" => "1",
213
+ "start" => "1",
214
+ "end" => "10000000"
215
+ },
216
+ :attributes => [
217
+ "ensembl_gene_id", "ensembl_transcript_id",
218
+ "mouse_paralog_ensembl_gene", "mouse_paralog_chromosome"
219
+ ],
220
+ :required_attributes => ["mouse_paralog_ensembl_gene"]
221
+ }
222
+
223
+ results = @ensembl.search( search_opts )
224
+
225
+ assert( results.is_a?(Hash), "Biomart::Dataset.search is not returning a hash. [required_attributes search]" )
226
+ assert( results[:data].is_a?(Array), "Biomart::Dataset.search[:data] is not returning an array. [required_attributes search]" )
227
+ results[:data].each do |data_row|
228
+ assert_equal( false, data_row[2].nil?, "The required_attributes search has not filtered out nil values." )
229
+ end
230
+
231
+ assert_raise( Biomart::ArgumentError ) { @ensembl.count( search_opts ) }
232
+ assert_raise Biomart::ArgumentError do
233
+ search_opts[:required_attributes] = "mouse_paralog_ensembl_gene"
234
+ @ensembl.search( search_opts )
235
+ end
236
+
237
+ results = @dcc.search(
238
+ :filters => {
239
+ "marker_symbol" => [
240
+ "Lrrc32", "Dub3", "Hs3st4", "Hs3st4", "Hs3st4", "Hs3st4",
241
+ "Hs3st4", "Hs3st4", "Hs3st4", "Tcrg-C", "Gm5195", "Gm5198",
242
+ "Gm5199", "Gm5625", "Rpl13-ps2", "Gm5664", "Gm5928", "Gm6035",
243
+ "Gm6049"
244
+ ]
245
+ },
246
+ :required_attributes => ["ikmc_project","ikmc_project_id"],
247
+ :process_results => true
248
+ )
249
+
250
+ results.each do |data_row|
251
+ assert_equal( false, data_row["ikmc_project"].nil?, "The required_attributes search has not filtered out nil values." )
252
+ assert_equal( false, data_row["ikmc_project_id"].nil?, "The required_attributes search has not filtered out nil values." )
253
+ end
254
+ end
127
255
  end
128
256
 
129
- def perform_count_queries( library )
130
- htgt_count = @htgt_targ.count()
131
- assert( htgt_count.is_a?( Integer ), "Biomart::Dataset.count is not returning integers. [using #{library} for HTTP communication]" )
132
- assert( htgt_count > 0, "Biomart::Dataset.count is returning zero - this is wrong! [using #{library} for HTTP communication]" )
133
-
134
- htgt_count_single_filter = @htgt_targ.count( :filters => { "is_eucomm" => "1" } )
135
- assert( htgt_count_single_filter.is_a?( Integer ), "Biomart::Dataset.count (with single filter) is not returning integers. [using #{library} for HTTP communication]" )
136
- assert( htgt_count_single_filter > 0, "Biomart::Dataset.count (with single filter) is returning zero - this is wrong! [using #{library} for HTTP communication]" )
137
-
138
- htgt_count_single_filter_group_value = @htgt_targ.count( :filters => { "marker_symbol" => ["Cbx1","Cbx7","Art4"] } )
139
- assert( htgt_count_single_filter_group_value.is_a?( Integer ), "Biomart::Dataset.count (with single filter, group value) is not returning integers. [using #{library} for HTTP communication]" )
140
- assert( htgt_count_single_filter_group_value > 0, "Biomart::Dataset.count (with single filter, group value) is returning zero - this is wrong! [using #{library} for HTTP communication]" )
257
+ context "A Biomart::Attribute instance" do
258
+ setup do
259
+ @kermits = @htgt.datasets["kermits"]
260
+ end
261
+
262
+ should "have basic metadata" do
263
+ true_false = [true,false]
264
+ ens_gene_id = @kermits.attributes["ensembl_gene_id"]
265
+
266
+ assert( !ens_gene_id.name.nil?, "Biomart::Attribute.name is nil." )
267
+ assert( !ens_gene_id.display_name.nil?, "Biomart::Attribute.display_name is nil." )
268
+
269
+ assert( true_false.include?( ens_gene_id.hidden? ), "Biomart::Attribute.hidden? is not returning true/false." )
270
+ assert( true_false.include?( ens_gene_id.default? ), "Biomart::Attribute.default? is not returning true/false." )
271
+ end
141
272
  end
142
273
 
143
- def perform_search_queries( library )
144
- search = @htgt_trap.search()
145
- assert( search.is_a?( Hash ), "Biomart::Dataset.search (no options) is not returning a hash. [using #{library} for HTTP communication]" )
146
- assert( search[:data].is_a?( Array ), "Biomart::Dataset.search[:data] (no options) is not returning an array. [using #{library} for HTTP communication]" )
147
-
148
- search1 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :process_results => true )
149
- assert( search1.is_a?( Array ), "Biomart::Dataset.search (filters defined with processing) is not returning an array. [using #{library} for HTTP communication]" )
150
- assert( search1.first.is_a?( Hash ), "Biomart::Dataset.search (filters defined with processing) is not returning an array of hashes. [using #{library} for HTTP communication]" )
151
- assert( search1.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters defined with processing) is not returning the correct info. [using #{library} for HTTP communication]" )
152
-
153
- search2 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :attributes => ["marker_symbol","ensembl_gene_id"], :process_results => true )
154
- assert( search2.is_a?( Array ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array. [using #{library} for HTTP communication]" )
155
- assert( search2.first.is_a?( Hash ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array of hashes. [using #{library} for HTTP communication]" )
156
- assert( search2.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters and attributes defined with processing) is not returning the correct info. [using #{library} for HTTP communication]" )
274
+ context "A Biomart::Filter instance" do
275
+ setup do
276
+ @kermits = @htgt.datasets["kermits"]
277
+ end
278
+
279
+ should "have basic metadata" do
280
+ true_false = [true,false]
281
+ ens_gene_id = @kermits.filters["ensembl_gene_id"]
282
+
283
+ assert( !ens_gene_id.name.nil?, "Biomart::Filter.name is nil." )
284
+ assert( !ens_gene_id.display_name.nil?, "Biomart::Filter.display_name is nil." )
285
+
286
+ assert( true_false.include?( ens_gene_id.hidden? ), "Biomart::Filter.hidden? is not returning true/false." )
287
+ assert( true_false.include?( ens_gene_id.default? ), "Biomart::Filter.default? is not returning true/false." )
288
+ assert( true_false.include?( ens_gene_id.multiple_values? ), "Biomart::Filter.multiple_values? is not returning true/false." )
289
+ end
157
290
  end
158
291
 
159
292
  context "The Biomart module" do
@@ -161,7 +294,7 @@ class BiomartTest < Test::Unit::TestCase
161
294
  @not_biomart = Biomart::Server.new( "http://www.sanger.ac.uk" )
162
295
  @htgt_targ = @htgt.datasets["htgt_targ"]
163
296
  @bad_dataset = Biomart::Dataset.new( "http://www.sanger.ac.uk/htgt/biomart", { :name => "wibble" } )
164
- @good_biomart = Biomart::Server.new( "http://www.sanger.ac.uk/htgt/biomart" )
297
+ @good_biomart = Biomart::Server.new( "http://www.i-dcc.org/biomart" )
165
298
  end
166
299
 
167
300
  should "allow you to ping a server" do
@@ -171,33 +304,13 @@ class BiomartTest < Test::Unit::TestCase
171
304
  end
172
305
 
173
306
  should "handle user/configuration errors (i.e. incorrect URLs etc)" do
174
- begin
175
- @not_biomart.list_databases
176
- rescue Biomart::HTTPError => e
177
- http_error = e
178
- end
179
-
180
- assert( http_error.is_a?( Biomart::HTTPError ), "Biomart.request is not processing HTTP errors correctly." )
307
+ assert_raise( Biomart::HTTPError ) { @not_biomart.list_databases }
181
308
  end
182
309
 
183
310
  should "handle biomart server errors gracefully" do
184
- begin
185
- @htgt_targ.count( :filters => { "wibbleblibbleblip" => "1" } )
186
- rescue Biomart::FilterError => e
187
- filter_error = e
188
- end
189
-
190
- begin
191
- @htgt_targ.search( :attributes => ["wibbleblibbleblip"] )
192
- rescue Biomart::AttributeError => e
193
- attribute_error = e
194
- end
195
-
196
- begin
197
- @bad_dataset.count()
198
- rescue Biomart::DatasetError => e
199
- dataset_error = e
200
- end
311
+ assert_raise( Biomart::FilterError ) { @htgt_targ.count( :filters => { "wibbleblibbleblip" => "1" } ) }
312
+ assert_raise( Biomart::AttributeError ) { @htgt_targ.search( :attributes => ["wibbleblibbleblip"] ) }
313
+ assert_raise( Biomart::DatasetError ) { @bad_dataset.count() }
201
314
 
202
315
  begin
203
316
  @bad_dataset.count()
@@ -205,10 +318,7 @@ class BiomartTest < Test::Unit::TestCase
205
318
  general_error = e
206
319
  end
207
320
 
208
- assert( filter_error.is_a?( Biomart::FilterError ), "Biomart.request is not handling Biomart filter errors correctly." )
209
- assert( attribute_error.is_a?( Biomart::AttributeError ), "Biomart.request is not handling Biomart attribute errors correctly." )
210
- assert( dataset_error.is_a?( Biomart::DatasetError ), "Biomart.request is not handling Biomart dataset errors correctly." )
211
- assert( general_error.is_a?( Biomart::BiomartError ), "Biomart.request is not handling general Biomart errors correctly." )
321
+ assert( general_error.is_a?(Biomart::BiomartError), "Biomart.request is not handling general Biomart errors correctly." )
212
322
  end
213
323
  end
214
324
  end
metadata CHANGED
@@ -1,7 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biomart
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ hash: 23
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 2
9
+ - 0
10
+ version: 0.2.0
5
11
  platform: ruby
6
12
  authors:
7
13
  - Darren Oakley
@@ -9,40 +15,55 @@ autorequire:
9
15
  bindir: bin
10
16
  cert_chain: []
11
17
 
12
- date: 2010-02-04 00:00:00 +00:00
18
+ date: 2010-06-10 00:00:00 +01:00
13
19
  default_executable:
14
20
  dependencies:
15
21
  - !ruby/object:Gem::Dependency
16
22
  name: builder
17
- type: :runtime
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
20
26
  requirements:
21
27
  - - ">="
22
28
  - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
23
32
  version: "0"
24
- version:
33
+ type: :runtime
34
+ version_requirements: *id001
25
35
  - !ruby/object:Gem::Dependency
26
- name: thoughtbot-shoulda
27
- type: :development
28
- version_requirement:
29
- version_requirements: !ruby/object:Gem::Requirement
36
+ name: shoulda
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
30
40
  requirements:
31
41
  - - ">="
32
42
  - !ruby/object:Gem::Version
33
- version: "0"
34
- version:
43
+ hash: 23
44
+ segments:
45
+ - 2
46
+ - 10
47
+ version: "2.10"
48
+ type: :development
49
+ version_requirements: *id002
35
50
  - !ruby/object:Gem::Dependency
36
51
  name: hoe
37
- type: :development
38
- version_requirement:
39
- version_requirements: !ruby/object:Gem::Requirement
52
+ prerelease: false
53
+ requirement: &id003 !ruby/object:Gem::Requirement
54
+ none: false
40
55
  requirements:
41
56
  - - ">="
42
57
  - !ruby/object:Gem::Version
43
- version: 2.3.3
44
- version:
45
- description: A ruby API for interacting with Biomart services.
58
+ hash: 21
59
+ segments:
60
+ - 2
61
+ - 6
62
+ - 1
63
+ version: 2.6.1
64
+ type: :development
65
+ version_requirements: *id003
66
+ description: A ruby API for interacting with Biomart XML based webservices.
46
67
  email:
47
68
  - daz.oakley@gmail.com
48
69
  executables: []
@@ -68,8 +89,8 @@ files:
68
89
  - script/console
69
90
  - script/destroy
70
91
  - script/generate
71
- - tasks/metrics.task
72
- - tasks/shoulda.task
92
+ - tasks/metrics.rake
93
+ - tasks/shoulda.rake
73
94
  - test/test_biomart.rb
74
95
  - test/test_helper.rb
75
96
  has_rdoc: true
@@ -83,21 +104,27 @@ rdoc_options:
83
104
  require_paths:
84
105
  - lib
85
106
  required_ruby_version: !ruby/object:Gem::Requirement
107
+ none: false
86
108
  requirements:
87
109
  - - ">="
88
110
  - !ruby/object:Gem::Version
111
+ hash: 3
112
+ segments:
113
+ - 0
89
114
  version: "0"
90
- version:
91
115
  required_rubygems_version: !ruby/object:Gem::Requirement
116
+ none: false
92
117
  requirements:
93
118
  - - ">="
94
119
  - !ruby/object:Gem::Version
120
+ hash: 3
121
+ segments:
122
+ - 0
95
123
  version: "0"
96
- version:
97
124
  requirements: []
98
125
 
99
126
  rubyforge_project: biomart
100
- rubygems_version: 1.3.5
127
+ rubygems_version: 1.3.7
101
128
  signing_key:
102
129
  specification_version: 3
103
130
  summary: A ruby API for interacting with Biomart services.