biomart 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ === 0.1.0 2009-08-29
2
+
3
+ * 1 major enhancement:
4
+ * Initial release:
5
+ * Basic gathering of information about a Biomart server.
6
+ * Counting functionality.
7
+ * Basic searching of a single dataset.
@@ -0,0 +1,17 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.rdoc
4
+ Rakefile
5
+ biomart.gemspec
6
+ lib/biomart.rb
7
+ lib/biomart/attribute.rb
8
+ lib/biomart/database.rb
9
+ lib/biomart/dataset.rb
10
+ lib/biomart/filter.rb
11
+ lib/biomart/server.rb
12
+ script/console
13
+ script/destroy
14
+ script/generate
15
+ tasks/shoulda.rake
16
+ test/test_biomart.rb
17
+ test/test_helper.rb
@@ -0,0 +1,126 @@
1
+ = Biomart
2
+
3
+ http://biomart.rubyforge.org
4
+
5
+ http://github.com/dazoakley/biomart
6
+
7
+ Biomart provides a simple interface for working with Biomart servers
8
+ (see http://www.biomart.org for more info on Biomart itself), so you
9
+ don't have to get down and dirty with the basic webservice calls yourself.
10
+
11
+ == Install
12
+
13
+ sudo gem install biomart
14
+
15
+ == Usage
16
+
17
+ Include the module in your code:
18
+
19
+ require "rubygems"
20
+ require "biomart"
21
+
22
+ To basically connect to a Biomart server and have access to all of its
23
+ information and meta data:
24
+
25
+ biomart = Biomart::Server.new( "http://www.biomart.org/biomart" )
26
+
27
+ # List all of the available datasets on this Biomart server
28
+ # (be patient - this will take a while on this server as there's
29
+ # a lot there...)
30
+ p biomart.list_datasets
31
+
32
+ # Grab the "kermits" dataset
33
+ kermits = biomart.datasets["kermits"]
34
+
35
+ # List it's filters and attributes
36
+ p kermits.list_filters
37
+ p kermits.list_attributes
38
+
39
+ # Count the total number of records in the dataset
40
+ p kermits.count()
41
+
42
+ # Do a count with a filter added
43
+ p kermits.count( :filters => { "sponsor" => "EUCOMM" } )
44
+
45
+ # Do a search using the default filters and attributes
46
+ # - this will return a hash with :headers (an array of the headers)
47
+ # and :data (an array of arrays of results)
48
+ #
49
+ # Doing a search like this is generally a BAD idea as biomarts tend
50
+ # to hold a LOT of data... Unless you have time to kill...
51
+ p kermits.search()
52
+
53
+ # Do a search using some specific filters (but the default attributes)
54
+ p kermits.search( :filters => { "marker_symbol" => "Cbx1" } )
55
+
56
+ # Do a search with specific filters and attributes
57
+ p kermits.search(
58
+ :filters => { "marker_symbol" => "Cbx1" },
59
+ :attributes => ["marker_symbol"]
60
+ )
61
+
62
+ # If you would like to retrieve a more useful results object - i.e. an
63
+ # array of hashes, where each hash represents a row of results (keyed
64
+ # by the attribute name), add the :process_results argument
65
+ p kermits.search(
66
+ :filters => { "marker_symbol" => "Cbx1" },
67
+ :process_results => true
68
+ )
69
+
70
+ Or if you know the dataset you wish to work with and would like to
71
+ just get on with things...
72
+
73
+ htgt_targ = Biomart::Dataset.new(
74
+ "http://www.sanger.ac.uk/htgt/biomart",
75
+ { :name => "htgt_targ" }
76
+ )
77
+ p htgt_targ.count( :filters => { "is_eucomm" => "1" } )
78
+
79
+ # etc. etc.
80
+
81
+ See Biomart module and Class docs for more detail.
82
+
83
+ == Using a Proxy
84
+
85
+ If you need to channel all of your requests via a proxy, specify your
86
+ proxy via Biomart.proxy:
87
+
88
+ Biomart.proxy = "http://proxy.example.com/"
89
+
90
+ Now all requests made through Biomart will be proxied via proxy.example.com.
91
+
92
+ Alternatively you can also set your proxy url in the environment variable
93
+ 'http_proxy', and Biomart will automatically detect this.
94
+
95
+ == Meta
96
+
97
+ Written by Darren Oakley (daz dot oakley at gmail dot com)
98
+
99
+ http://biomart.rubyforge.org
100
+
101
+ http://github.com/dazoakley/biomart
102
+
103
+ == License
104
+
105
+ (The MIT License)
106
+
107
+ Copyright (c) 2009 Darren Oakley
108
+
109
+ Permission is hereby granted, free of charge, to any person obtaining
110
+ a copy of this software and associated documentation files (the
111
+ 'Software'), to deal in the Software without restriction, including
112
+ without limitation the rights to use, copy, modify, merge, publish,
113
+ distribute, sublicense, and/or sell copies of the Software, and to
114
+ permit persons to whom the Software is furnished to do so, subject to
115
+ the following conditions:
116
+
117
+ The above copyright notice and this permission notice shall be
118
+ included in all copies or substantial portions of the Software.
119
+
120
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
121
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
122
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
123
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
124
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
125
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
126
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,29 @@
1
+ require "rubygems"
2
+ gem "hoe", ">= 2.1.0"
3
+ require "hoe"
4
+ require "fileutils"
5
+ require "./lib/biomart"
6
+
7
+ Hoe.plugin :newgem
8
+ # Hoe.plugin :website
9
+ # Hoe.plugin :cucumberfeatures
10
+
11
+ # Generate all the Rake tasks
12
+ # Run "rake -T" to see list of generated tasks (from gem root directory)
13
+ $hoe = Hoe.spec "biomart" do
14
+ self.developer "Darren Oakley", "daz.oakley@gmail.com"
15
+ self.rubyforge_name = self.name
16
+ self.url = "http://github.com/dazoakley/biomart"
17
+ self.summary = "A ruby API for interacting with Biomart services."
18
+ self.description = "A ruby API for interacting with Biomart services."
19
+ self.extra_deps = [["builder",">= 0"]]
20
+ self.extra_dev_deps = [["thoughtbot-shoulda",">=0"]]
21
+ self.extra_rdoc_files = ["README.rdoc"]
22
+ end
23
+
24
+ require "newgem/tasks"
25
+ Dir["tasks/**/*.rake"].each { |t| load t }
26
+
27
+ # TODO - want other tests/tasks run by default? Add them to the list
28
+ # remove_task :default
29
+ # task :default => [:spec, :features]
@@ -0,0 +1,36 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{biomart}
5
+ s.version = "0.1.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Darren Oakley"]
9
+ s.date = %q{2009-08-29}
10
+ s.description = %q{A ruby API for interacting with Biomart services.}
11
+ s.email = ["daz.oakley@gmail.com"]
12
+ s.extra_rdoc_files = ["History.txt", "Manifest.txt", "README.rdoc"]
13
+ s.files = ["History.txt", "Manifest.txt", "README.rdoc", "Rakefile", "biomart.gemspec", "lib/biomart.rb", "lib/biomart/attribute.rb", "lib/biomart/database.rb", "lib/biomart/dataset.rb", "lib/biomart/filter.rb", "lib/biomart/server.rb", "script/console", "script/destroy", "script/generate", "tasks/shoulda.rake", "test/test_biomart.rb", "test/test_helper.rb"]
14
+ s.homepage = %q{http://github.com/dazoakley/biomart}
15
+ s.rdoc_options = ["--main", "README.rdoc"]
16
+ s.require_paths = ["lib"]
17
+ s.rubyforge_project = %q{biomart}
18
+ s.rubygems_version = %q{1.3.5}
19
+ s.summary = %q{A ruby API for interacting with Biomart services.}
20
+ s.test_files = ["test/test_biomart.rb", "test/test_helper.rb"]
21
+
22
+ if s.respond_to? :specification_version then
23
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
24
+ s.specification_version = 3
25
+
26
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
27
+ s.add_runtime_dependency(%q<builder>, [">= 0"])
28
+ s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
29
+ s.add_development_dependency(%q<hoe>, [">= 2.3.3"])
30
+ else
31
+ s.add_dependency(%q<builder>, [">= 0"])
32
+ end
33
+ else
34
+ s.add_dependency(%q<builder>, [">= 0"])
35
+ end
36
+ end
@@ -0,0 +1,94 @@
1
+ require "uri"
2
+ require "net/http"
3
+ require "rexml/document"
4
+ require "csv"
5
+
6
+ require "rubygems"
7
+ require "builder"
8
+
9
+ module Biomart
10
+ VERSION = "0.1.0"
11
+
12
+ # This is the base Biomart error/exception class. Rescue it if
13
+ # you want to catch any exceptions that this code might raise.
14
+ class BiomartError < StandardError
15
+ attr_reader :data
16
+
17
+ def initialize(data)
18
+ @data = data
19
+ super
20
+ end
21
+ end
22
+
23
+ # Error class representing HTTP errors.
24
+ class HTTPError < BiomartError; end
25
+
26
+ # Error class representing biomart filter errors. Usually raised
27
+ # when a request is made for a incorrectly named (or non-existent)
28
+ # filter.
29
+ class FilterError < BiomartError; end
30
+
31
+ # Error class representing biomart attribute errors. Usually raised
32
+ # when a request is made for a incorrectly named (or non-existent)
33
+ # attribute.
34
+ class AttributeError < BiomartError; end
35
+
36
+ # Error class representing biomart dataset errors. Usually raised
37
+ # when a request is made for a incorrectly named (or non-existent)
38
+ # dataset.
39
+ class DatasetError < BiomartError; end
40
+
41
+ @@url = 'http://www.biomart.org/biomart/martservice'
42
+ @@client = Net::HTTP
43
+
44
+ # Centralised request function for handling all of the HTTP requests
45
+ # to the biomart servers.
46
+ def request( params={} )
47
+ if Biomart.proxy or ENV['http_proxy']
48
+ proxy_uri = Biomart.proxy
49
+ proxy_uri ||= ENV['http_proxy']
50
+ proxy = URI.parse( proxy_uri )
51
+ @@client = Net::HTTP::Proxy( proxy.host, proxy.port )
52
+ end
53
+
54
+ params[:url] = URI.escape(params[:url])
55
+
56
+ if params[:method] === 'post'
57
+ res = @@client.post_form( URI.parse(params[:url]), { "query" => params[:query] } )
58
+ else
59
+ res = @@client.get_response( URI.parse(params[:url]) )
60
+ end
61
+
62
+ # Process the response code/body to catch errors.
63
+ if res.code != "200"
64
+ raise HTTPError.new(res.code), "HTTP error #{res.code}, please check your biomart server and URL settings."
65
+ else
66
+ if res.body =~ /ERROR/
67
+ if res.body =~ /Filter (.+) NOT FOUND/
68
+ raise FilterError.new(res.body), "Biomart error. Filter #{$1} not found."
69
+ elsif res.body =~ /Attribute (.+) NOT FOUND/
70
+ raise AttributeError.new(res.body), "Biomart error. Attribute #{$1} not found."
71
+ elsif res.body =~ /Dataset (.+) NOT FOUND/
72
+ raise DatasetError.new(res.body), "Biomart error. Dataset #{$1} not found."
73
+ else
74
+ raise BiomartError.new(res.body), "Biomart error."
75
+ end
76
+ end
77
+ end
78
+
79
+ return res.body
80
+ end
81
+
82
+ class << self
83
+ attr_accessor :proxy
84
+ end
85
+
86
+ end
87
+
88
+ directory = File.expand_path(File.dirname(__FILE__))
89
+
90
+ require File.join(directory, 'biomart', 'server')
91
+ require File.join(directory, 'biomart', 'database')
92
+ require File.join(directory, 'biomart', 'dataset')
93
+ require File.join(directory, 'biomart', 'filter')
94
+ require File.join(directory, 'biomart', 'attribute')
@@ -0,0 +1,14 @@
1
+ module Biomart
2
+ # Class representation for a biomart attribute.
3
+ # Will belong to a Biomart::Dataset.
4
+ class Attribute
5
+ attr_reader :name, :display_name, :default
6
+
7
+ def initialize(args)
8
+ @name = args["internalName"]
9
+ @display_name = args["displayName"]
10
+ @default = args["default"] ? true : false
11
+ end
12
+
13
+ end
14
+ end
@@ -0,0 +1,60 @@
1
+ module Biomart
2
+ # Class representation for a biomart database.
3
+ # Will contain many Biomart::Dataset objects, and belong to a Biomart::Server.
4
+ class Database
5
+ include Biomart
6
+
7
+ attr_reader :name, :display_name, :visible
8
+
9
+ def initialize( url, args )
10
+ @url = url or raise ArgumentError, "must pass :url"
11
+ unless @url =~ /martservice/
12
+ @url = @url + "/martservice"
13
+ end
14
+
15
+ @name = args["name"] || args[:name]
16
+ @display_name = args["displayName"] || args[:display_name]
17
+ @visible = ( args["visible"] || args[:visible] ) ? true : false
18
+ @datasets = {}
19
+ end
20
+
21
+ # Returns an array of the dataset names (biomart 'name')
22
+ # for this dataset.
23
+ def list_datasets
24
+ if @datasets.empty?
25
+ fetch_datasets
26
+ end
27
+ return @datasets.keys
28
+ end
29
+
30
+ # Returns a hash (keyed by the biomart 'name' for the dataset)
31
+ # of all of the Biomart::Dataset objects belonging to this server.
32
+ def datasets
33
+ if @datasets.empty?
34
+ fetch_datasets
35
+ end
36
+ return @datasets
37
+ end
38
+
39
+ private
40
+
41
+ # Utility method to do the webservice call to the biomart server
42
+ # and collate/build the information about the datasets.
43
+ def fetch_datasets
44
+ url = @url + "?type=datasets&mart=#{@name}"
45
+ document = request( :url => url )
46
+ tsv_data = CSV.parse( document, "\t" )
47
+ tsv_data.each do |t|
48
+ if t[1] and ( t[3] === "1" )
49
+ dataset_attr = {
50
+ "name" => t[1],
51
+ "displayName" => t[2],
52
+ "visible" => t[3]
53
+ }
54
+ @datasets[ dataset_attr["name"] ] = Dataset.new( @url, dataset_attr )
55
+ end
56
+ end
57
+ end
58
+
59
+ end
60
+ end
@@ -0,0 +1,209 @@
1
+ module Biomart
2
+ # Class represetation for a biomart dataset.
3
+ # Can belong to a Biomart::Database and a Biomart::Server.
4
+ class Dataset
5
+ include Biomart
6
+
7
+ attr_reader :name, :display_name, :visible
8
+
9
+ def initialize( url, args )
10
+ @url = url or raise ArgumentError, "must pass :url"
11
+ unless @url =~ /martservice/
12
+ @url = @url + "/martservice"
13
+ end
14
+
15
+ @name = args["name"] || args[:name]
16
+ @display_name = args["displayName"] || args[:display_name]
17
+ @visible = ( args["visible"] || args[:visible] ) ? true : false
18
+
19
+ @filters = {}
20
+ @attributes = {}
21
+ @importables = {}
22
+ @exportables = {}
23
+ end
24
+
25
+ # Returns a hash (keyed by the biomart 'internal_name' for the filter)
26
+ # of all of the Biomart::Filter objects belonging to this dataset.
27
+ def filters
28
+ if @filters.empty?
29
+ fetch_configuration()
30
+ end
31
+ return @filters
32
+ end
33
+
34
+ # Returns an array of the filter names (biomart 'internal_name')
35
+ # for this dataset.
36
+ def list_filters
37
+ if @filters.empty?
38
+ fetch_configuration()
39
+ end
40
+ return @filters.keys
41
+ end
42
+
43
+ # Returns a hash (keyed by the biomart 'internal_name' for the attribute)
44
+ # of all of the Biomart::Attribute objects belonging to this dataset.
45
+ def attributes
46
+ if @attributes.empty?
47
+ fetch_configuration()
48
+ end
49
+ return @attributes
50
+ end
51
+
52
+ # Returns an array of the attribute names (biomart 'internal_name')
53
+ # for this dataset.
54
+ def list_attributes
55
+ if @attributes.empty?
56
+ fetch_configuration()
57
+ end
58
+ return @attributes.keys
59
+ end
60
+
61
+ # Function to perform a Biomart count. Returns an integer value for
62
+ # the result of the count query.
63
+ #
64
+ # optional arguments:
65
+ #
66
+ # :filters:: hash of key-value pairs (filter => search term)
67
+ def count( args={} )
68
+ args.merge!({ :count => "1" })
69
+ result = request( :method => 'post', :url => @url, :query => generate_xml(args) )
70
+ return result.to_i
71
+ end
72
+
73
+ # Function to perform a Biomart search.
74
+ #
75
+ # optional arguments:
76
+ #
77
+ # :filters:: hash of key-value pairs (filter => search term)
78
+ # :attributes:: array of attributes to retrieve
79
+ # :process_results:: true/false - convert search results to object
80
+ #
81
+ # By default will return a hash with the following:
82
+ #
83
+ # :headers:: array of headers
84
+ # :data:: array of arrays containing search results
85
+ #
86
+ # But with the :process_results option will return an array of hashes,
87
+ # where each hash represents a row of results (keyed by the attribute name).
88
+ def search( args={} )
89
+ response = request( :method => 'post', :url => @url, :query => generate_xml(args) )
90
+ result = process_tsv( args, response )
91
+ result = conv_results_to_a_of_h( result ) if args[:process_results]
92
+ return result
93
+ end
94
+
95
+ # Utility function to build the Biomart query XML
96
+ def generate_xml( args={} )
97
+ biomart_xml = ""
98
+ xml = Builder::XmlMarkup.new( :target => biomart_xml, :indent => 2 )
99
+
100
+ xml.instruct!
101
+ xml.declare!( :DOCTYPE, :Query )
102
+ xml.Query( :virtualSchemaName => "default", :formatter => "TSV", :header => "0", :uniqueRows => "1", :count => args[:count], :datasetConfigVersion => "0.6" ) {
103
+ xml.Dataset( :name => @name, :interface => "default" ) {
104
+
105
+ if args[:filters]
106
+ args[:filters].each do |name,value|
107
+ if value.is_a? Array
108
+ value = value.join(",")
109
+ end
110
+ xml.Filter( :name => name, :value => value )
111
+ end
112
+ else
113
+ self.filters.each do |name,filter|
114
+ if filter.default
115
+ xml.Filter( :name => name, :value => filter.default_value )
116
+ end
117
+ end
118
+ end
119
+
120
+ unless args[:count]
121
+ if args[:attributes]
122
+ args[:attributes].each do |name|
123
+ xml.Attribute( :name => name )
124
+ end
125
+ else
126
+ self.attributes.each do |name,attribute|
127
+ if attribute.default
128
+ xml.Attribute( :name => name )
129
+ end
130
+ end
131
+ end
132
+ end
133
+
134
+ }
135
+ }
136
+
137
+ return biomart_xml
138
+ end
139
+
140
+ private
141
+
142
+ # Utility function to retrieve and process the configuration
143
+ # xml for a dataset
144
+ def fetch_configuration
145
+ url = @url + "?type=configuration&dataset=#{@name}"
146
+ document = REXML::Document.new( request( :url => url ) )
147
+
148
+ # Top-Level filters...
149
+ REXML::XPath.each( document, '//FilterDescription' ) do |f|
150
+ unless f.attributes["displayType"].eql? "container"
151
+ @filters[ f.attributes["internalName"] ] = Filter.new( f.attributes )
152
+ end
153
+ end
154
+
155
+ # Filters nested inside containers...
156
+ REXML::XPath.each( document, '//FilterDescription/Option' ) do |f|
157
+ if f.attributes["displayType"] != nil
158
+ @filters[ f.attributes["internalName"] ] = Filter.new( f.attributes )
159
+ end
160
+ end
161
+
162
+ # Attributes are much simpler...
163
+ REXML::XPath.each( document, '//AttributeDescription' ) do |a|
164
+ @attributes[ a.attributes["internalName"] ] = Attribute.new( a.attributes )
165
+ end
166
+ end
167
+
168
+ # Utility function to transform the tab-separated data retrieved
169
+ # from the Biomart search query into a ruby object.
170
+ def process_tsv( args, tsv )
171
+ headers = []
172
+
173
+ if args[:attributes]
174
+ args[:attributes].each do |attribute|
175
+ headers.push(attribute)
176
+ end
177
+ else
178
+ self.attributes.each do |name,attribute|
179
+ if attribute.default
180
+ headers.push(name)
181
+ end
182
+ end
183
+ end
184
+
185
+ return {
186
+ :headers => headers,
187
+ :data => CSV.parse( tsv, "\t" )
188
+ }
189
+ end
190
+
191
+ # Utility function to quickly convert a search result into an array of hashes
192
+ # (keyed by the attribute name) for easier processing - this is not done by
193
+ # default on all searches as this can cause a large overhead on big data returns.
194
+ def conv_results_to_a_of_h( search_results )
195
+ result_objects = []
196
+
197
+ search_results[:data].each do |row|
198
+ tmp = {}
199
+ row.each_index do |index|
200
+ tmp[ search_results[:headers][index] ] = row[index]
201
+ end
202
+ result_objects.push(tmp)
203
+ end
204
+
205
+ return result_objects
206
+ end
207
+
208
+ end
209
+ end
@@ -0,0 +1,15 @@
1
+ module Biomart
2
+ # Class representation for a biomart filter.
3
+ # Will belong to a Biomart::Dataset.
4
+ class Filter
5
+ attr_reader :name, :display_name, :default, :default_value
6
+
7
+ def initialize(args)
8
+ @name = args["internalName"]
9
+ @display_name = args["displayName"]
10
+ @default = args["defaultOn"] ? true : false
11
+ @default_value = args["defaultValue"]
12
+ end
13
+
14
+ end
15
+ end
@@ -0,0 +1,78 @@
1
+ module Biomart
2
+ # Class representation for a biomart server.
3
+ # Will contain many Biomart::Database and Biomart::Dataset objects.
4
+ class Server
5
+ include Biomart
6
+
7
+ attr_reader :url
8
+
9
+ def initialize( url )
10
+ @url = url or raise ArgumentError, "must pass :url"
11
+ unless @url =~ /martservice/
12
+ @url = @url + "/martservice"
13
+ end
14
+
15
+ @databases = {}
16
+ @datasets = {}
17
+ end
18
+
19
+ # Returns an array of the database names (biomart 'name')
20
+ # for this dataset.
21
+ def list_databases
22
+ if @databases.empty?
23
+ fetch_databases
24
+ end
25
+ return @databases.keys
26
+ end
27
+
28
+ # Returns a hash (keyed by the biomart 'name' for the database)
29
+ # of all of the Biomart::Database objects belonging to this server.
30
+ def databases
31
+ if @databases.empty?
32
+ fetch_databases
33
+ end
34
+ return @databases
35
+ end
36
+
37
+ # Returns an array of the dataset names (biomart 'name')
38
+ # for this dataset.
39
+ def list_datasets
40
+ if @datasets.empty?
41
+ fetch_datasets
42
+ end
43
+ return @datasets.keys
44
+ end
45
+
46
+ # Returns a hash (keyed by the biomart 'name' for the dataset)
47
+ # of all of the Biomart::Dataset objects belonging to this server.
48
+ def datasets
49
+ if @datasets.empty?
50
+ fetch_datasets
51
+ end
52
+ return @datasets
53
+ end
54
+
55
+ private
56
+
57
+ # Utility method to do the webservice call to the biomart server
58
+ # and collate/build the information about the databases.
59
+ def fetch_databases
60
+ url = @url + '?type=registry'
61
+ document = REXML::Document.new( request( :url => url ) )
62
+ REXML::XPath.each( document, "//MartURLLocation" ) do |d|
63
+ if d.attributes["visible"] === "1"
64
+ @databases[ d.attributes["name"] ] = Database.new( @url, d.attributes )
65
+ end
66
+ end
67
+ end
68
+
69
+ # Utility function to collate all of the Biomart::Dataset objects
70
+ # contained within the Biomart::Database objects.
71
+ def fetch_datasets
72
+ self.databases.each do |name,database|
73
+ @datasets.merge!( database.datasets )
74
+ end
75
+ end
76
+
77
+ end
78
+ end
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ # File: script/console
3
+ irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
4
+
5
+ libs = " -r irb/completion"
6
+ # Perhaps use a console_lib to store any extra methods I may want available in the cosole
7
+ # libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
8
+ libs << " -r #{File.dirname(__FILE__) + '/../lib/biomart.rb'}"
9
+ puts "Loading biomart gem"
10
+ exec "#{irb} #{libs} --simple-prompt"
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/destroy'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Destroy.new.run(ARGV)
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/generate'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Generate.new.run(ARGV)
@@ -0,0 +1,15 @@
1
+ begin
2
+ require 'shoulda'
3
+ rescue LoadError
4
+ require 'rubygems' unless ENV['NO_RUBYGEMS']
5
+ require 'shoulda'
6
+ end
7
+
8
+ require 'rake/testtask'
9
+
10
+ desc "Run the shoulda test under /spec"
11
+ Rake::TestTask.new do |t|
12
+ t.libs << "test"
13
+ t.test_files = FileList['spec/*_spec.rb']
14
+ t.verbose = true
15
+ end
@@ -0,0 +1,145 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+
3
+ class BiomartTest < Test::Unit::TestCase
4
+ def setup
5
+ @htgt = Biomart::Server.new('http://www.sanger.ac.uk/htgt/biomart')
6
+ @htgt_alt = Biomart::Server.new('http://www.sanger.ac.uk/htgt/biomart')
7
+ end
8
+
9
+ context "A Biomart::Server instance" do
10
+ should "have databases" do
11
+ assert( @htgt_alt.list_databases.is_a?( Array ), "Biomart::Server.list_databases is not returning an array." )
12
+ assert( @htgt_alt.list_databases.include?( "htgt" ), "Biomart::Server datbase parsing is off - htgt is not in htgt!" )
13
+ assert( @htgt.databases["htgt"].is_a?( Biomart::Database ), "Biomart::Server is not creating Biomart::Database objects." )
14
+ end
15
+
16
+ should "have datasets" do
17
+ assert( @htgt_alt.list_datasets.is_a?( Array ), "Biomart::Server.list_datasets is not returning an array." )
18
+ assert( @htgt_alt.list_datasets.include?( "htgt_targ" ), "Biomart::Server dataset parsing is off - htgt_targ is not in htgt!" )
19
+ assert( @htgt.datasets["htgt_targ"].is_a?( Biomart::Dataset ), "Biomart::Server is not creating Biomart::Dataset objects." )
20
+ end
21
+ end
22
+
23
+ context "A Biomart::Database instance" do
24
+ setup do
25
+ @htgt_database = @htgt.databases["htgt"]
26
+ end
27
+
28
+ should "have basic metadata" do
29
+ assert( @htgt_database.display_name, "Biomart::Database does not have a 'display name'." )
30
+ assert( @htgt_database.name, "Biomart::Database does not have a 'name'." )
31
+ assert( @htgt_database.visible != nil, "Biomart::Database does not have a 'visible' flag." )
32
+ end
33
+
34
+ should "have datasets" do
35
+ assert( @htgt_database.list_datasets.is_a?( Array ), "Biomart::Database.list_datasets is not returning an array." )
36
+ assert( @htgt_database.list_datasets.include?( "htgt_targ" ), "Biomart::Database dataset parsing is off - htgt_targ is not in htgt!" )
37
+ assert( @htgt_database.datasets["htgt_targ"].is_a?( Biomart::Dataset ), "Biomart::Database is not creating Biomart::Dataset objects." )
38
+ end
39
+ end
40
+
41
+ context "A Biomart::Dataset instance" do
42
+ setup do
43
+ @htgt_targ = @htgt.datasets["htgt_targ"]
44
+ @htgt_trap = @htgt.datasets["htgt_trap"]
45
+ @kermits = @htgt.datasets["kermits"]
46
+ end
47
+
48
+ should "have basic metadata" do
49
+ assert( @htgt_targ.display_name, "Biomart::Dataset does not have a 'display name'." )
50
+ assert( @htgt_targ.name, "Biomart::Dataset does not have a 'name'." )
51
+ assert( @htgt_targ.visible != nil, "Biomart::Dataset does not have a 'visible' flag." )
52
+ end
53
+
54
+ should "have filters" do
55
+ assert( @htgt_targ.list_filters.is_a?( Array ), "Biomart::Dataset.list_filters is not returning an array." )
56
+ assert( @htgt_targ.list_filters.include?( "ensembl_gene_id" ), "Biomart::Dataset filter parsing is off - ensembl_gene_id is not in htgt_targ!" )
57
+ assert( @kermits.filters["ensembl_gene_id"].is_a?( Biomart::Filter ), "Biomart::Dataset is not creating Biomart::Filter objects." )
58
+ end
59
+
60
+ should "have attributes" do
61
+ assert( @htgt_targ.list_attributes.is_a?( Array ), "Biomart::Dataset.list_attributes is not returning an array." )
62
+ assert( @htgt_targ.list_attributes.include?( "ensembl_gene_id" ), "Biomart::Dataset attribute parsing is off - ensembl_gene_id is not in htgt_targ!" )
63
+ assert( @kermits.attributes["ensembl_gene_id"].is_a?( Biomart::Attribute ), "Biomart::Dataset is not creating Biomart::Attribute objects." )
64
+ end
65
+
66
+ should "perform count queries" do
67
+ htgt_count = @htgt_targ.count()
68
+ assert( htgt_count.is_a?( Integer ), "Biomart::Dataset.count is not returning integers." )
69
+ assert( htgt_count > 0, "Biomart::Dataset.count is returning zero - this is wrong!" )
70
+
71
+ htgt_count_single_filter = @htgt_targ.count( :filters => { "is_eucomm" => "1" } )
72
+ assert( htgt_count_single_filter.is_a?( Integer ), "Biomart::Dataset.count (with single filter) is not returning integers." )
73
+ assert( htgt_count_single_filter > 0, "Biomart::Dataset.count (with single filter) is returning zero - this is wrong!" )
74
+
75
+ htgt_count_single_filter_group_value = @htgt_targ.count( :filters => { "marker_symbol" => ["Cbx1","Cbx7","Art4"] } )
76
+ assert( htgt_count_single_filter_group_value.is_a?( Integer ), "Biomart::Dataset.count (with single filter, group value) is not returning integers." )
77
+ assert( htgt_count_single_filter_group_value > 0, "Biomart::Dataset.count (with single filter, group value) is returning zero - this is wrong!" )
78
+ end
79
+
80
+ should "perform search queries" do
81
+ search = @htgt_trap.search()
82
+ assert( search.is_a?( Hash ), "Biomart::Dataset.search (no options) is not returning a hash." )
83
+ assert( search[:data].is_a?( Array ), "Biomart::Dataset.search[:data] (no options) is not returning an array." )
84
+
85
+ search1 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :process_results => true )
86
+ assert( search1.is_a?( Array ), "Biomart::Dataset.search (filters defined with processing) is not returning an array." )
87
+ assert( search1.first.is_a?( Hash ), "Biomart::Dataset.search (filters defined with processing) is not returning an array of hashes." )
88
+ assert( search1.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters defined with processing) is not returning the correct info." )
89
+
90
+ search2 = @htgt_targ.search( :filters => { "marker_symbol" => "Cbx1" }, :attributes => ["marker_symbol","ensembl_gene_id"], :process_results => true )
91
+ assert( search2.is_a?( Array ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array." )
92
+ assert( search2.first.is_a?( Hash ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array of hashes." )
93
+ assert( search2.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters and attributes defined with processing) is not returning the correct info." )
94
+ end
95
+ end
96
+
97
+ context "The Biomart module" do
98
+ setup do
99
+ @not_biomart = Biomart::Server.new('http://www.sanger.ac.uk')
100
+ @htgt_targ = @htgt.datasets["htgt_targ"]
101
+ @bad_dataset = Biomart::Dataset.new( "http://www.sanger.ac.uk/htgt/biomart", { :name => "wibble" } )
102
+ end
103
+
104
+ should "handle user/configuration errors (i.e. incorrect URLs etc)" do
105
+ begin
106
+ @not_biomart.list_databases
107
+ rescue Biomart::HTTPError => e
108
+ http_error = e
109
+ end
110
+
111
+ assert( http_error.is_a?( Biomart::HTTPError ), "Biomart.request is not processing HTTP errors correctly." )
112
+ end
113
+
114
+ should "handle biomart server errors gracefully" do
115
+ begin
116
+ @htgt_targ.count( :filters => { "wibbleblibbleblip" => "1" } )
117
+ rescue Biomart::FilterError => e
118
+ filter_error = e
119
+ end
120
+
121
+ begin
122
+ @htgt_targ.search( :attributes => ["wibbleblibbleblip"] )
123
+ rescue Biomart::AttributeError => e
124
+ attribute_error = e
125
+ end
126
+
127
+ begin
128
+ @bad_dataset.count()
129
+ rescue Biomart::DatasetError => e
130
+ dataset_error = e
131
+ end
132
+
133
+ begin
134
+ @bad_dataset.count()
135
+ rescue Biomart::BiomartError => e
136
+ general_error = e
137
+ end
138
+
139
+ assert( filter_error.is_a?( Biomart::FilterError ), "Biomart.request is not handling Biomart filter errors correctly." )
140
+ assert( attribute_error.is_a?( Biomart::AttributeError ), "Biomart.request is not handling Biomart attribute errors correctly." )
141
+ assert( dataset_error.is_a?( Biomart::DatasetError ), "Biomart.request is not handling Biomart dataset errors correctly." )
142
+ assert( general_error.is_a?( Biomart::BiomartError ), "Biomart.request is not handling general Biomart errors correctly." )
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,9 @@
1
+ begin
2
+ require 'shoulda'
3
+ rescue LoadError
4
+ require 'rubygems' unless ENV['NO_RUBYGEMS']
5
+ require 'shoulda'
6
+ end
7
+
8
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
9
+ require 'Biomart'
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: biomart
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Darren Oakley
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-08-29 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: builder
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: thoughtbot-shoulda
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: hoe
37
+ type: :development
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: 2.3.3
44
+ version:
45
+ description: A ruby API for interacting with Biomart services.
46
+ email:
47
+ - daz.oakley@gmail.com
48
+ executables: []
49
+
50
+ extensions: []
51
+
52
+ extra_rdoc_files:
53
+ - History.txt
54
+ - Manifest.txt
55
+ - README.rdoc
56
+ files:
57
+ - History.txt
58
+ - Manifest.txt
59
+ - README.rdoc
60
+ - Rakefile
61
+ - biomart.gemspec
62
+ - lib/biomart.rb
63
+ - lib/biomart/attribute.rb
64
+ - lib/biomart/database.rb
65
+ - lib/biomart/dataset.rb
66
+ - lib/biomart/filter.rb
67
+ - lib/biomart/server.rb
68
+ - script/console
69
+ - script/destroy
70
+ - script/generate
71
+ - tasks/shoulda.rake
72
+ - test/test_biomart.rb
73
+ - test/test_helper.rb
74
+ has_rdoc: true
75
+ homepage: http://github.com/dazoakley/biomart
76
+ licenses: []
77
+
78
+ post_install_message:
79
+ rdoc_options:
80
+ - --main
81
+ - README.rdoc
82
+ require_paths:
83
+ - lib
84
+ required_ruby_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: "0"
89
+ version:
90
+ required_rubygems_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: "0"
95
+ version:
96
+ requirements: []
97
+
98
+ rubyforge_project: biomart
99
+ rubygems_version: 1.3.5
100
+ signing_key:
101
+ specification_version: 3
102
+ summary: A ruby API for interacting with Biomart services.
103
+ test_files:
104
+ - test/test_biomart.rb
105
+ - test/test_helper.rb