biomart 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ === 0.1.0 2009-08-29
2
+
3
+ * 1 major enhancement:
4
+ * Initial release:
5
+ * Basic gathering of information about a Biomart server.
6
+ * Counting functionality.
7
+ * Basic searching of a single dataset.
@@ -0,0 +1,17 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.rdoc
4
+ Rakefile
5
+ biomart.gemspec
6
+ lib/biomart.rb
7
+ lib/biomart/attribute.rb
8
+ lib/biomart/database.rb
9
+ lib/biomart/dataset.rb
10
+ lib/biomart/filter.rb
11
+ lib/biomart/server.rb
12
+ script/console
13
+ script/destroy
14
+ script/generate
15
+ tasks/shoulda.rake
16
+ test/test_biomart.rb
17
+ test/test_helper.rb
@@ -0,0 +1,126 @@
1
+ = Biomart
2
+
3
+ http://biomart.rubyforge.org
4
+
5
+ http://github.com/dazoakley/biomart
6
+
7
+ Biomart provides a simple interface for working with Biomart servers
8
+ (see http://www.biomart.org for more info on Biomart itself), so you
9
+ don't have to get down and dirty with the basic webservice calls yourself.
10
+
11
+ == Install
12
+
13
+ sudo gem install biomart
14
+
15
+ == Usage
16
+
17
+ Include the module in your code:
18
+
19
+ require "rubygems"
20
+ require "biomart"
21
+
22
+ To basically connect to a Biomart server and have access to all of its
23
+ information and meta data:
24
+
25
+ biomart = Biomart::Server.new( "http://www.biomart.org/biomart" )
26
+
27
+ # List all of the available datasets on this Biomart server
28
+ # (be patient - this will take a while on this server as there's
29
+ # a lot there...)
30
+ p biomart.list_datasets
31
+
32
+ # Grab the "kermits" dataset
33
+ kermits = biomart.datasets["kermits"]
34
+
35
+ # List it's filters and attributes
36
+ p kermits.list_filters
37
+ p kermits.list_attributes
38
+
39
+ # Count the total number of records in the dataset
40
+ p kermits.count()
41
+
42
+ # Do a count with a filter added
43
+ p kermits.count( :filters => { "sponsor" => "EUCOMM" } )
44
+
45
+ # Do a search using the default filters and attributes
46
+ # - this will return a hash with :headers (an array of the headers)
47
+ # and :data (an array of arrays of results)
48
+ #
49
+ # Doing a search like this is generally a BAD idea as biomarts tend
50
+ # to hold a LOT of data... Unless you have time to kill...
51
+ p kermits.search()
52
+
53
+ # Do a search using some specific filters (but the default attributes)
54
+ p kermits.search( :filters => { "marker_symbol" => "Cbx1" } )
55
+
56
+ # Do a search with specific filters and attributes
57
+ p kermits.search(
58
+ :filters => { "marker_symbol" => "Cbx1" },
59
+ :attributes => ["marker_symbol"]
60
+ )
61
+
62
+ # If you would like to retrieve a more useful results object - i.e. an
63
+ # array of hashes, where each hash represents a row of results (keyed
64
+ # by the attribute name), add the :process_results argument
65
+ p kermits.search(
66
+ :filters => { "marker_symbol" => "Cbx1" },
67
+ :process_results => true
68
+ )
69
+
70
+ Or if you know the dataset you wish to work with and would like to
71
+ just get on with things...
72
+
73
+ htgt_targ = Biomart::Dataset.new(
74
+ "http://www.sanger.ac.uk/htgt/biomart",
75
+ { :name => "htgt_targ" }
76
+ )
77
+ p htgt_targ.count( :filters => { "is_eucomm" => "1" } )
78
+
79
+ # etc. etc.
80
+
81
+ See Biomart module and Class docs for more detail.
82
+
83
+ == Using a Proxy
84
+
85
+ If you need to channel all of your requests via a proxy, specify your
86
+ proxy via Biomart.proxy:
87
+
88
+ Biomart.proxy = "http://proxy.example.com/"
89
+
90
+ Now all requests made through Biomart will be proxied via proxy.example.com.
91
+
92
+ Alternatively you can also set your proxy url in the environment variable
93
+ 'http_proxy', and Biomart will automatically detect this.
94
+
95
+ == Meta
96
+
97
+ Written by Darren Oakley (daz dot oakley at gmail dot com)
98
+
99
+ http://biomart.rubyforge.org
100
+
101
+ http://github.com/dazoakley/biomart
102
+
103
+ == License
104
+
105
+ (The MIT License)
106
+
107
+ Copyright (c) 2009 Darren Oakley
108
+
109
+ Permission is hereby granted, free of charge, to any person obtaining
110
+ a copy of this software and associated documentation files (the
111
+ 'Software'), to deal in the Software without restriction, including
112
+ without limitation the rights to use, copy, modify, merge, publish,
113
+ distribute, sublicense, and/or sell copies of the Software, and to
114
+ permit persons to whom the Software is furnished to do so, subject to
115
+ the following conditions:
116
+
117
+ The above copyright notice and this permission notice shall be
118
+ included in all copies or substantial portions of the Software.
119
+
120
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
121
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
122
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
123
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
124
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
125
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
126
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,29 @@
1
+ require "rubygems"
2
+ gem "hoe", ">= 2.1.0"
3
+ require "hoe"
4
+ require "fileutils"
5
+ require "./lib/biomart"
6
+
7
+ Hoe.plugin :newgem
8
+ # Hoe.plugin :website
9
+ # Hoe.plugin :cucumberfeatures
10
+
11
+ # Generate all the Rake tasks
12
+ # Run "rake -T" to see list of generated tasks (from gem root directory)
13
+ $hoe = Hoe.spec "biomart" do
14
+ self.developer "Darren Oakley", "daz.oakley@gmail.com"
15
+ self.rubyforge_name = self.name
16
+ self.url = "http://github.com/dazoakley/biomart"
17
+ self.summary = "A ruby API for interacting with Biomart services."
18
+ self.description = "A ruby API for interacting with Biomart services."
19
+ self.extra_deps = [["builder",">= 0"]]
20
+ self.extra_dev_deps = [["thoughtbot-shoulda",">=0"]]
21
+ self.extra_rdoc_files = ["README.rdoc"]
22
+ end
23
+
24
+ require "newgem/tasks"
25
+ Dir["tasks/**/*.rake"].each { |t| load t }
26
+
27
+ # TODO - want other tests/tasks run by default? Add them to the list
28
+ # remove_task :default
29
+ # task :default => [:spec, :features]
@@ -0,0 +1,36 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{biomart}
5
+ s.version = "0.1.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Darren Oakley"]
9
+ s.date = %q{2009-08-29}
10
+ s.description = %q{A ruby API for interacting with Biomart services.}
11
+ s.email = ["daz.oakley@gmail.com"]
12
+ s.extra_rdoc_files = ["History.txt", "Manifest.txt", "README.rdoc"]
13
+ s.files = ["History.txt", "Manifest.txt", "README.rdoc", "Rakefile", "biomart.gemspec", "lib/biomart.rb", "lib/biomart/attribute.rb", "lib/biomart/database.rb", "lib/biomart/dataset.rb", "lib/biomart/filter.rb", "lib/biomart/server.rb", "script/console", "script/destroy", "script/generate", "tasks/shoulda.rake", "test/test_biomart.rb", "test/test_helper.rb"]
14
+ s.homepage = %q{http://github.com/dazoakley/biomart}
15
+ s.rdoc_options = ["--main", "README.rdoc"]
16
+ s.require_paths = ["lib"]
17
+ s.rubyforge_project = %q{biomart}
18
+ s.rubygems_version = %q{1.3.5}
19
+ s.summary = %q{A ruby API for interacting with Biomart services.}
20
+ s.test_files = ["test/test_biomart.rb", "test/test_helper.rb"]
21
+
22
+ if s.respond_to? :specification_version then
23
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
24
+ s.specification_version = 3
25
+
26
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
27
+ s.add_runtime_dependency(%q<builder>, [">= 0"])
28
+ s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
29
+ s.add_development_dependency(%q<hoe>, [">= 2.3.3"])
30
+ else
31
+ s.add_dependency(%q<builder>, [">= 0"])
32
+ end
33
+ else
34
+ s.add_dependency(%q<builder>, [">= 0"])
35
+ end
36
+ end
@@ -0,0 +1,94 @@
1
+ require "uri"
2
+ require "net/http"
3
+ require "rexml/document"
4
+ require "csv"
5
+
6
+ require "rubygems"
7
+ require "builder"
8
+
9
+ module Biomart
10
+ VERSION = "0.1.0"
11
+
12
+ # This is the base Biomart error/exception class. Rescue it if
13
+ # you want to catch any exceptions that this code might raise.
14
+ class BiomartError < StandardError
15
+ attr_reader :data
16
+
17
+ def initialize(data)
18
+ @data = data
19
+ super
20
+ end
21
+ end
22
+
23
+ # Error class representing HTTP errors.
24
+ class HTTPError < BiomartError; end
25
+
26
+ # Error class representing biomart filter errors. Usually raised
27
+ # when a request is made for a incorrectly named (or non-existent)
28
+ # filter.
29
+ class FilterError < BiomartError; end
30
+
31
+ # Error class representing biomart attribute errors. Usually raised
32
+ # when a request is made for a incorrectly named (or non-existent)
33
+ # attribute.
34
+ class AttributeError < BiomartError; end
35
+
36
+ # Error class representing biomart dataset errors. Usually raised
37
+ # when a request is made for a incorrectly named (or non-existent)
38
+ # dataset.
39
+ class DatasetError < BiomartError; end
40
+
41
+ @@url = 'http://www.biomart.org/biomart/martservice'
42
+ @@client = Net::HTTP
43
+
44
+ # Centralised request function for handling all of the HTTP requests
45
+ # to the biomart servers.
46
+ def request( params={} )
47
+ if Biomart.proxy or ENV['http_proxy']
48
+ proxy_uri = Biomart.proxy
49
+ proxy_uri ||= ENV['http_proxy']
50
+ proxy = URI.parse( proxy_uri )
51
+ @@client = Net::HTTP::Proxy( proxy.host, proxy.port )
52
+ end
53
+
54
+ params[:url] = URI.escape(params[:url])
55
+
56
+ if params[:method] === 'post'
57
+ res = @@client.post_form( URI.parse(params[:url]), { "query" => params[:query] } )
58
+ else
59
+ res = @@client.get_response( URI.parse(params[:url]) )
60
+ end
61
+
62
+ # Process the response code/body to catch errors.
63
+ if res.code != "200"
64
+ raise HTTPError.new(res.code), "HTTP error #{res.code}, please check your biomart server and URL settings."
65
+ else
66
+ if res.body =~ /ERROR/
67
+ if res.body =~ /Filter (.+) NOT FOUND/
68
+ raise FilterError.new(res.body), "Biomart error. Filter #{$1} not found."
69
+ elsif res.body =~ /Attribute (.+) NOT FOUND/
70
+ raise AttributeError.new(res.body), "Biomart error. Attribute #{$1} not found."
71
+ elsif res.body =~ /Dataset (.+) NOT FOUND/
72
+ raise DatasetError.new(res.body), "Biomart error. Dataset #{$1} not found."
73
+ else
74
+ raise BiomartError.new(res.body), "Biomart error."
75
+ end
76
+ end
77
+ end
78
+
79
+ return res.body
80
+ end
81
+
82
+ class << self
83
+ attr_accessor :proxy
84
+ end
85
+
86
+ end
87
+
88
+ directory = File.expand_path(File.dirname(__FILE__))
89
+
90
+ require File.join(directory, 'biomart', 'server')
91
+ require File.join(directory, 'biomart', 'database')
92
+ require File.join(directory, 'biomart', 'dataset')
93
+ require File.join(directory, 'biomart', 'filter')
94
+ require File.join(directory, 'biomart', 'attribute')
@@ -0,0 +1,14 @@
1
+ module Biomart
2
+ # Class representation for a biomart attribute.
3
+ # Will belong to a Biomart::Dataset.
4
+ class Attribute
5
+ attr_reader :name, :display_name, :default
6
+
7
+ def initialize(args)
8
+ @name = args["internalName"]
9
+ @display_name = args["displayName"]
10
+ @default = args["default"] ? true : false
11
+ end
12
+
13
+ end
14
+ end
@@ -0,0 +1,60 @@
1
+ module Biomart
2
+ # Class representation for a biomart database.
3
+ # Will contain many Biomart::Dataset objects, and belong to a Biomart::Server.
4
+ class Database
5
+ include Biomart
6
+
7
+ attr_reader :name, :display_name, :visible
8
+
9
+ def initialize( url, args )
10
+ @url = url or raise ArgumentError, "must pass :url"
11
+ unless @url =~ /martservice/
12
+ @url = @url + "/martservice"
13
+ end
14
+
15
+ @name = args["name"] || args[:name]
16
+ @display_name = args["displayName"] || args[:display_name]
17
+ @visible = ( args["visible"] || args[:visible] ) ? true : false
18
+ @datasets = {}
19
+ end
20
+
21
+ # Returns an array of the dataset names (biomart 'name')
22
+ # for this dataset.
23
+ def list_datasets
24
+ if @datasets.empty?
25
+ fetch_datasets
26
+ end
27
+ return @datasets.keys
28
+ end
29
+
30
+ # Returns a hash (keyed by the biomart 'name' for the dataset)
31
+ # of all of the Biomart::Dataset objects belonging to this server.
32
+ def datasets
33
+ if @datasets.empty?
34
+ fetch_datasets
35
+ end
36
+ return @datasets
37
+ end
38
+
39
+ private
40
+
41
+ # Utility method to do the webservice call to the biomart server
42
+ # and collate/build the information about the datasets.
43
+ def fetch_datasets
44
+ url = @url + "?type=datasets&mart=#{@name}"
45
+ document = request( :url => url )
46
+ tsv_data = CSV.parse( document, "\t" )
47
+ tsv_data.each do |t|
48
+ if t[1] and ( t[3] === "1" )
49
+ dataset_attr = {
50
+ "name" => t[1],
51
+ "displayName" => t[2],
52
+ "visible" => t[3]
53
+ }
54
+ @datasets[ dataset_attr["name"] ] = Dataset.new( @url, dataset_attr )
55
+ end
56
+ end
57
+ end
58
+
59
+ end
60
+ end
@@ -0,0 +1,209 @@
1
+ module Biomart
2
+ # Class represetation for a biomart dataset.
3
+ # Can belong to a Biomart::Database and a Biomart::Server.
4
+ class Dataset
5
+ include Biomart
6
+
7
+ attr_reader :name, :display_name, :visible
8
+
9
+ def initialize( url, args )
10
+ @url = url or raise ArgumentError, "must pass :url"
11
+ unless @url =~ /martservice/
12
+ @url = @url + "/martservice"
13
+ end
14
+
15
+ @name = args["name"] || args[:name]
16
+ @display_name = args["displayName"] || args[:display_name]
17
+ @visible = ( args["visible"] || args[:visible] ) ? true : false
18
+
19
+ @filters = {}
20
+ @attributes = {}
21
+ @importables = {}
22
+ @exportables = {}
23
+ end
24
+
25
+ # Returns a hash (keyed by the biomart 'internal_name' for the filter)
26
+ # of all of the Biomart::Filter objects belonging to this dataset.
27
+ def filters
28
+ if @filters.empty?
29
+ fetch_configuration()
30
+ end
31
+ return @filters
32
+ end
33
+
34
+ # Returns an array of the filter names (biomart 'internal_name')
35
+ # for this dataset.
36
+ def list_filters
37
+ if @filters.empty?
38
+ fetch_configuration()
39
+ end
40
+ return @filters.keys
41
+ end
42
+
43
+ # Returns a hash (keyed by the biomart 'internal_name' for the attribute)
44
+ # of all of the Biomart::Attribute objects belonging to this dataset.
45
+ def attributes
46
+ if @attributes.empty?
47
+ fetch_configuration()
48
+ end
49
+ return @attributes
50
+ end
51
+
52
+ # Returns an array of the attribute names (biomart 'internal_name')
53
+ # for this dataset.
54
+ def list_attributes
55
+ if @attributes.empty?
56
+ fetch_configuration()
57
+ end
58
+ return @attributes.keys
59
+ end
60
+
61
+ # Function to perform a Biomart count. Returns an integer value for
62
+ # the result of the count query.
63
+ #
64
+ # optional arguments:
65
+ #
66
+ # :filters:: hash of key-value pairs (filter => search term)
67
+ def count( args={} )
68
+ args.merge!({ :count => "1" })
69
+ result = request( :method => 'post', :url => @url, :query => generate_xml(args) )
70
+ return result.to_i
71
+ end
72
+
73
+ # Function to perform a Biomart search.
74
+ #
75
+ # optional arguments:
76
+ #
77
+ # :filters:: hash of key-value pairs (filter => search term)
78
+ # :attributes:: array of attributes to retrieve
79
+ # :process_results:: true/false - convert search results to object
80
+ #
81
+ # By default will return a hash with the following:
82
+ #
83
+ # :headers:: array of headers
84
+ # :data:: array of arrays containing search results
85
+ #
86
+ # But with the :process_results option will return an array of hashes,
87
+ # where each hash represents a row of results (keyed by the attribute name).
88
+ def search( args={} )
89
+ response = request( :method => 'post', :url => @url, :query => generate_xml(args) )
90
+ result = process_tsv( args, response )
91
+ result = conv_results_to_a_of_h( result ) if args[:process_results]
92
+ return result
93
+ end
94
+
95
+ # Utility function to build the Biomart query XML
96
+ def generate_xml( args={} )
97
+ biomart_xml = ""
98
+ xml = Builder::XmlMarkup.new( :target => biomart_xml, :indent => 2 )
99
+
100
+ xml.instruct!
101
+ xml.declare!( :DOCTYPE, :Query )
102
+ xml.Query( :virtualSchemaName => "default", :formatter => "TSV", :header => "0", :uniqueRows => "1", :count => args[:count], :datasetConfigVersion => "0.6" ) {
103
+ xml.Dataset( :name => @name, :interface => "default" ) {
104
+
105
+ if args[:filters]
106
+ args[:filters].each do |name,value|
107
+ if value.is_a? Array
108
+ value = value.join(",")
109
+ end
110
+ xml.Filter( :name => name, :value => value )
111
+ end
112
+ else
113
+ self.filters.each do |name,filter|
114
+ if filter.default
115
+ xml.Filter( :name => name, :value => filter.default_value )
116
+ end
117
+ end
118
+ end
119
+
120
+ unless args[:count]
121
+ if args[:attributes]
122
+ args[:attributes].each do |name|
123
+ xml.Attribute( :name => name )
124
+ end
125
+ else
126
+ self.attributes.each do |name,attribute|
127
+ if attribute.default
128
+ xml.Attribute( :name => name )
129
+ end
130
+ end
131
+ end
132
+ end
133
+
134
+ }
135
+ }
136
+
137
+ return biomart_xml
138
+ end
139
+
140
+ private
141
+
142
+ # Utility function to retrieve and process the configuration
143
+ # xml for a dataset
144
+ def fetch_configuration
145
+ url = @url + "?type=configuration&dataset=#{@name}"
146
+ document = REXML::Document.new( request( :url => url ) )
147
+
148
+ # Top-Level filters...
149
+ REXML::XPath.each( document, '//FilterDescription' ) do |f|
150
+ unless f.attributes["displayType"].eql? "container"
151
+ @filters[ f.attributes["internalName"] ] = Filter.new( f.attributes )
152
+ end
153
+ end
154
+
155
+ # Filters nested inside containers...
156
+ REXML::XPath.each( document, '//FilterDescription/Option' ) do |f|
157
+ if f.attributes["displayType"] != nil
158
+ @filters[ f.attributes["internalName"] ] = Filter.new( f.attributes )
159
+ end
160
+ end
161
+
162
+ # Attributes are much simpler...
163
+ REXML::XPath.each( document, '//AttributeDescription' ) do |a|
164
+ @attributes[ a.attributes["internalName"] ] = Attribute.new( a.attributes )
165
+ end
166
+ end
167
+
168
+ # Utility function to transform the tab-separated data retrieved
169
+ # from the Biomart search query into a ruby object.
170
+ def process_tsv( args, tsv )
171
+ headers = []
172
+
173
+ if args[:attributes]
174
+ args[:attributes].each do |attribute|
175
+ headers.push(attribute)
176
+ end
177
+ else
178
+ self.attributes.each do |name,attribute|
179
+ if attribute.default
180
+ headers.push(name)
181
+ end
182
+ end
183
+ end
184
+
185
+ return {
186
+ :headers => headers,
187
+ :data => CSV.parse( tsv, "\t" )
188
+ }
189
+ end
190
+
191
+ # Utility function to quickly convert a search result into an array of hashes
192
+ # (keyed by the attribute name) for easier processing - this is not done by
193
+ # default on all searches as this can cause a large overhead on big data returns.
194
+ def conv_results_to_a_of_h( search_results )
195
+ result_objects = []
196
+
197
+ search_results[:data].each do |row|
198
+ tmp = {}
199
+ row.each_index do |index|
200
+ tmp[ search_results[:headers][index] ] = row[index]
201
+ end
202
+ result_objects.push(tmp)
203
+ end
204
+
205
+ return result_objects
206
+ end
207
+
208
+ end
209
+ end
@@ -0,0 +1,15 @@
1
+ module Biomart
2
+ # Class representation for a biomart filter.
3
+ # Will belong to a Biomart::Dataset.
4
+ class Filter
5
+ attr_reader :name, :display_name, :default, :default_value
6
+
7
+ def initialize(args)
8
+ @name = args["internalName"]
9
+ @display_name = args["displayName"]
10
+ @default = args["defaultOn"] ? true : false
11
+ @default_value = args["defaultValue"]
12
+ end
13
+
14
+ end
15
+ end
@@ -0,0 +1,78 @@
1
+ module Biomart
2
+ # Class representation for a biomart server.
3
+ # Will contain many Biomart::Database and Biomart::Dataset objects.
4
+ class Server
5
+ include Biomart
6
+
7
+ attr_reader :url
8
+
9
+ def initialize( url )
10
+ @url = url or raise ArgumentError, "must pass :url"
11
+ unless @url =~ /martservice/
12
+ @url = @url + "/martservice"
13
+ end
14
+
15
+ @databases = {}
16
+ @datasets = {}
17
+ end
18
+
19
+ # Returns an array of the database names (biomart 'name')
20
+ # for this dataset.
21
+ def list_databases
22
+ if @databases.empty?
23
+ fetch_databases
24
+ end
25
+ return @databases.keys
26
+ end
27
+
28
+ # Returns a hash (keyed by the biomart 'name' for the database)
29
+ # of all of the Biomart::Database objects belonging to this server.
30
+ def databases
31
+ if @databases.empty?
32
+ fetch_databases
33
+ end
34
+ return @databases
35
+ end
36
+
37
+ # Returns an array of the dataset names (biomart 'name')
38
+ # for this dataset.
39
+ def list_datasets
40
+ if @datasets.empty?
41
+ fetch_datasets
42
+ end
43
+ return @datasets.keys
44
+ end
45
+
46
+ # Returns a hash (keyed by the biomart 'name' for the dataset)
47
+ # of all of the Biomart::Dataset objects belonging to this server.
48
+ def datasets
49
+ if @datasets.empty?
50
+ fetch_datasets
51
+ end
52
+ return @datasets
53
+ end
54
+
55
+ private
56
+
57
+ # Utility method to do the webservice call to the biomart server
58
+ # and collate/build the information about the databases.
59
+ def fetch_databases
60
+ url = @url + '?type=registry'
61
+ document = REXML::Document.new( request( :url => url ) )
62
+ REXML::XPath.each( document, "//MartURLLocation" ) do |d|
63
+ if d.attributes["visible"] === "1"
64
+ @databases[ d.attributes["name"] ] = Database.new( @url, d.attributes )
65
+ end
66
+ end
67
+ end
68
+
69
+ # Utility function to collate all of the Biomart::Dataset objects
70
+ # contained within the Biomart::Database objects.
71
+ def fetch_datasets
72
+ self.databases.each do |name,database|
73
+ @datasets.merge!( database.datasets )
74
+ end
75
+ end
76
+
77
+ end
78
+ end
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ # File: script/console
3
+ irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
4
+
5
+ libs = " -r irb/completion"
6
+ # Perhaps use a console_lib to store any extra methods I may want available in the cosole
7
+ # libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
8
+ libs << " -r #{File.dirname(__FILE__) + '/../lib/biomart.rb'}"
9
+ puts "Loading biomart gem"
10
+ exec "#{irb} #{libs} --simple-prompt"
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/destroy'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Destroy.new.run(ARGV)
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/generate'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Generate.new.run(ARGV)
@@ -0,0 +1,15 @@
1
+ begin
2
+ require 'shoulda'
3
+ rescue LoadError
4
+ require 'rubygems' unless ENV['NO_RUBYGEMS']
5
+ require 'shoulda'
6
+ end
7
+
8
+ require 'rake/testtask'
9
+
10
+ desc "Run the shoulda test under /spec"
11
+ Rake::TestTask.new do |t|
12
+ t.libs << "test"
13
+ t.test_files = FileList['spec/*_spec.rb']
14
+ t.verbose = true
15
+ end
@@ -0,0 +1,145 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+
3
+ class BiomartTest < Test::Unit::TestCase
4
+ def setup
5
+ @htgt = Biomart::Server.new('http://www.sanger.ac.uk/htgt/biomart')
6
+ @htgt_alt = Biomart::Server.new('http://www.sanger.ac.uk/htgt/biomart')
7
+ end
8
+
9
+ context "A Biomart::Server instance" do
10
+ should "have databases" do
11
+ assert( @htgt_alt.list_databases.is_a?( Array ), "Biomart::Server.list_databases is not returning an array." )
12
+ assert( @htgt_alt.list_databases.include?( "htgt" ), "Biomart::Server datbase parsing is off - htgt is not in htgt!" )
13
+ assert( @htgt.databases["htgt"].is_a?( Biomart::Database ), "Biomart::Server is not creating Biomart::Database objects." )
14
+ end
15
+
16
+ should "have datasets" do
17
+ assert( @htgt_alt.list_datasets.is_a?( Array ), "Biomart::Server.list_datasets is not returning an array." )
18
+ assert( @htgt_alt.list_datasets.include?( "htgt_targ" ), "Biomart::Server dataset parsing is off - htgt_targ is not in htgt!" )
19
+ assert( @htgt.datasets["htgt_targ"].is_a?( Biomart::Dataset ), "Biomart::Server is not creating Biomart::Dataset objects." )
20
+ end
21
+ end
22
+
23
+ context "A Biomart::Database instance" do
24
+ setup do
25
+ @htgt_database = @htgt.databases["htgt"]
26
+ end
27
+
28
+ should "have basic metadata" do
29
+ assert( @htgt_database.display_name, "Biomart::Database does not have a 'display name'." )
30
+ assert( @htgt_database.name, "Biomart::Database does not have a 'name'." )
31
+ assert( @htgt_database.visible != nil, "Biomart::Database does not have a 'visible' flag." )
32
+ end
33
+
34
+ should "have datasets" do
35
+ assert( @htgt_database.list_datasets.is_a?( Array ), "Biomart::Database.list_datasets is not returning an array." )
36
+ assert( @htgt_database.list_datasets.include?( "htgt_targ" ), "Biomart::Database dataset parsing is off - htgt_targ is not in htgt!" )
37
+ assert( @htgt_database.datasets["htgt_targ"].is_a?( Biomart::Dataset ), "Biomart::Database is not creating Biomart::Dataset objects." )
38
+ end
39
+ end
40
+
41
+ context "A Biomart::Dataset instance" do
42
+ setup do
43
+ @htgt_targ = @htgt.datasets["htgt_targ"]
44
+ @htgt_trap = @htgt.datasets["htgt_trap"]
45
+ @kermits = @htgt.datasets["kermits"]
46
+ end
47
+
48
+ should "have basic metadata" do
49
+ assert( @htgt_targ.display_name, "Biomart::Dataset does not have a 'display name'." )
50
+ assert( @htgt_targ.name, "Biomart::Dataset does not have a 'name'." )
51
+ assert( @htgt_targ.visible != nil, "Biomart::Dataset does not have a 'visible' flag." )
52
+ end
53
+
54
+ should "have filters" do
55
+ assert( @htgt_targ.list_filters.is_a?( Array ), "Biomart::Dataset.list_filters is not returning an array." )
56
+ assert( @htgt_targ.list_filters.include?( "ensembl_gene_id" ), "Biomart::Dataset filter parsing is off - ensembl_gene_id is not in htgt_targ!" )
57
+ assert( @kermits.filters["ensembl_gene_id"].is_a?( Biomart::Filter ), "Biomart::Dataset is not creating Biomart::Filter objects." )
58
+ end
59
+
60
+ should "have attributes" do
61
+ assert( @htgt_targ.list_attributes.is_a?( Array ), "Biomart::Dataset.list_attributes is not returning an array." )
62
+ assert( @htgt_targ.list_attributes.include?( "ensembl_gene_id" ), "Biomart::Dataset attribute parsing is off - ensembl_gene_id is not in htgt_targ!" )
63
+ assert( @kermits.attributes["ensembl_gene_id"].is_a?( Biomart::Attribute ), "Biomart::Dataset is not creating Biomart::Attribute objects." )
64
+ end
65
+
66
+ should "perform count queries" do
67
+ htgt_count = @htgt_targ.count()
68
+ assert( htgt_count.is_a?( Integer ), "Biomart::Dataset.count is not returning integers." )
69
+ assert( htgt_count > 0, "Biomart::Dataset.count is returning zero - this is wrong!" )
70
+
71
+ htgt_count_single_filter = @htgt_targ.count( :filters => { "is_eucomm" => "1" } )
72
+ assert( htgt_count_single_filter.is_a?( Integer ), "Biomart::Dataset.count (with single filter) is not returning integers." )
73
+ assert( htgt_count_single_filter > 0, "Biomart::Dataset.count (with single filter) is returning zero - this is wrong!" )
74
+
75
+ htgt_count_single_filter_group_value = @htgt_targ.count( :filters => { "marker_symbol" => ["Cbx1","Cbx7","Art4"] } )
76
+ assert( htgt_count_single_filter_group_value.is_a?( Integer ), "Biomart::Dataset.count (with single filter, group value) is not returning integers." )
77
+ assert( htgt_count_single_filter_group_value > 0, "Biomart::Dataset.count (with single filter, group value) is returning zero - this is wrong!" )
78
+ end
79
+
80
+ should "perform search queries" do
81
+ search = @htgt_trap.search()
82
+ assert( search.is_a?( Hash ), "Biomart::Dataset.search (no options) is not returning a hash." )
83
+ assert( search[:data].is_a?( Array ), "Biomart::Dataset.search[:data] (no options) is not returning an array." )
84
+
85
+ search1 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :process_results => true )
86
+ assert( search1.is_a?( Array ), "Biomart::Dataset.search (filters defined with processing) is not returning an array." )
87
+ assert( search1.first.is_a?( Hash ), "Biomart::Dataset.search (filters defined with processing) is not returning an array of hashes." )
88
+ assert( search1.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters defined with processing) is not returning the correct info." )
89
+
90
+ search2 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :attributes => ["marker_symbol","ensembl_gene_id"], :process_results => true )
91
+ assert( search2.is_a?( Array ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array." )
92
+ assert( search2.first.is_a?( Hash ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array of hashes." )
93
+ assert( search2.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters and attributes defined with processing) is not returning the correct info." )
94
+ end
95
+ end
96
+
97
+ context "The Biomart module" do
98
+ setup do
99
+ @not_biomart = Biomart::Server.new('http://www.sanger.ac.uk')
100
+ @htgt_targ = @htgt.datasets["htgt_targ"]
101
+ @bad_dataset = Biomart::Dataset.new( "http://www.sanger.ac.uk/htgt/biomart", { :name => "wibble" } )
102
+ end
103
+
104
+ should "handle user/configuration errors (i.e. incorrect URLs etc)" do
105
+ begin
106
+ @not_biomart.list_databases
107
+ rescue Biomart::HTTPError => e
108
+ http_error = e
109
+ end
110
+
111
+ assert( http_error.is_a?( Biomart::HTTPError ), "Biomart.request is not processing HTTP errors correctly." )
112
+ end
113
+
114
+ should "handle biomart server errors gracefully" do
115
+ begin
116
+ @htgt_targ.count( :filters => { "wibbleblibbleblip" => "1" } )
117
+ rescue Biomart::FilterError => e
118
+ filter_error = e
119
+ end
120
+
121
+ begin
122
+ @htgt_targ.search( :attributes => ["wibbleblibbleblip"] )
123
+ rescue Biomart::AttributeError => e
124
+ attribute_error = e
125
+ end
126
+
127
+ begin
128
+ @bad_dataset.count()
129
+ rescue Biomart::DatasetError => e
130
+ dataset_error = e
131
+ end
132
+
133
+ begin
134
+ @bad_dataset.count()
135
+ rescue Biomart::BiomartError => e
136
+ general_error = e
137
+ end
138
+
139
+ assert( filter_error.is_a?( Biomart::FilterError ), "Biomart.request is not handling Biomart filter errors correctly." )
140
+ assert( attribute_error.is_a?( Biomart::AttributeError ), "Biomart.request is not handling Biomart attribute errors correctly." )
141
+ assert( dataset_error.is_a?( Biomart::DatasetError ), "Biomart.request is not handling Biomart dataset errors correctly." )
142
+ assert( general_error.is_a?( Biomart::BiomartError ), "Biomart.request is not handling general Biomart errors correctly." )
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,9 @@
1
+ begin
2
+ require 'shoulda'
3
+ rescue LoadError
4
+ require 'rubygems' unless ENV['NO_RUBYGEMS']
5
+ require 'shoulda'
6
+ end
7
+
8
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
9
+ require 'Biomart'
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: biomart
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Darren Oakley
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-08-29 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: builder
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: thoughtbot-shoulda
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: hoe
37
+ type: :development
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: 2.3.3
44
+ version:
45
+ description: A ruby API for interacting with Biomart services.
46
+ email:
47
+ - daz.oakley@gmail.com
48
+ executables: []
49
+
50
+ extensions: []
51
+
52
+ extra_rdoc_files:
53
+ - History.txt
54
+ - Manifest.txt
55
+ - README.rdoc
56
+ files:
57
+ - History.txt
58
+ - Manifest.txt
59
+ - README.rdoc
60
+ - Rakefile
61
+ - biomart.gemspec
62
+ - lib/biomart.rb
63
+ - lib/biomart/attribute.rb
64
+ - lib/biomart/database.rb
65
+ - lib/biomart/dataset.rb
66
+ - lib/biomart/filter.rb
67
+ - lib/biomart/server.rb
68
+ - script/console
69
+ - script/destroy
70
+ - script/generate
71
+ - tasks/shoulda.rake
72
+ - test/test_biomart.rb
73
+ - test/test_helper.rb
74
+ has_rdoc: true
75
+ homepage: http://github.com/dazoakley/biomart
76
+ licenses: []
77
+
78
+ post_install_message:
79
+ rdoc_options:
80
+ - --main
81
+ - README.rdoc
82
+ require_paths:
83
+ - lib
84
+ required_ruby_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: "0"
89
+ version:
90
+ required_rubygems_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: "0"
95
+ version:
96
+ requirements: []
97
+
98
+ rubyforge_project: biomart
99
+ rubygems_version: 1.3.5
100
+ signing_key:
101
+ specification_version: 3
102
+ summary: A ruby API for interacting with Biomart services.
103
+ test_files:
104
+ - test/test_biomart.rb
105
+ - test/test_helper.rb