biomart 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +13 -0
- data/biomart.gemspec +2 -2
- data/lib/biomart.rb +1 -1
- data/lib/biomart/dataset.rb +48 -3
- data/test/test_biomart.rb +19 -0
- metadata +2 -2
data/History.txt
CHANGED
@@ -5,3 +5,16 @@
|
|
5
5
|
* Basic gathering of information about a Biomart server.
|
6
6
|
* Counting functionality.
|
7
7
|
* Basic searching of a single dataset.
|
8
|
+
|
9
|
+
=== 0.1.1 2009-10-21
|
10
|
+
|
11
|
+
* 1 minor enhancement:
|
12
|
+
* Addition of an .alive? function to the Server and Dataset classes
|
13
|
+
to allow a user to ping a biomart server to make sure it is online
|
14
|
+
and functioning as expected.
|
15
|
+
|
16
|
+
=== 0.1.2 2009-10-29
|
17
|
+
|
18
|
+
* 1 major bugfix:
|
19
|
+
* Added in code to handle poorly formatted tab-separated data
|
20
|
+
coming back from a biomart query.
|
data/biomart.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{biomart}
|
5
|
-
s.version = "0.1.
|
5
|
+
s.version = "0.1.2"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Darren Oakley"]
|
9
|
-
s.date = %q{2009-10-
|
9
|
+
s.date = %q{2009-10-29}
|
10
10
|
s.description = %q{A ruby API for interacting with Biomart services.}
|
11
11
|
s.email = ["daz.oakley@gmail.com"]
|
12
12
|
s.extra_rdoc_files = ["History.txt", "Manifest.txt", "README.rdoc"]
|
data/lib/biomart.rb
CHANGED
data/lib/biomart/dataset.rb
CHANGED
@@ -175,7 +175,8 @@ module Biomart
|
|
175
175
|
# Utility function to transform the tab-separated data retrieved
|
176
176
|
# from the Biomart search query into a ruby object.
|
177
177
|
def process_tsv( args, tsv )
|
178
|
-
headers
|
178
|
+
headers = []
|
179
|
+
parsed_data = []
|
179
180
|
|
180
181
|
if args[:attributes]
|
181
182
|
args[:attributes].each do |attribute|
|
@@ -189,12 +190,56 @@ module Biomart
|
|
189
190
|
end
|
190
191
|
end
|
191
192
|
|
193
|
+
begin
|
194
|
+
parsed_data = CSV.parse( tsv, "\t" )
|
195
|
+
rescue CSV::IllegalFormatError => e
|
196
|
+
parsed_data = parse_tsv_line_by_line( headers.size, tsv )
|
197
|
+
end
|
198
|
+
|
192
199
|
return {
|
193
200
|
:headers => headers,
|
194
|
-
:data =>
|
201
|
+
:data => parsed_data
|
195
202
|
}
|
196
203
|
end
|
197
|
-
|
204
|
+
|
205
|
+
# Utility function to process TSV formatted data that raises errors. (Biomart
|
206
|
+
# has a habit of serving out this...) First attempts to use the CSV modules
|
207
|
+
# 'parse_line' function to read in the data, if that fails, tries to use split
|
208
|
+
# to recover the data.
|
209
|
+
def parse_tsv_line_by_line( expected_row_size, tsv )
|
210
|
+
parsed_data = []
|
211
|
+
|
212
|
+
data_by_line = tsv.split("\n")
|
213
|
+
data_by_line.each do |line|
|
214
|
+
elements = CSV::parse_line( line, "\t" )
|
215
|
+
|
216
|
+
if elements.size == 0
|
217
|
+
# This is a bad line (causing the above Exception), try and use split to recover.
|
218
|
+
# Alse add an empty value as split will miss the final value...
|
219
|
+
elements = line.split("\t")
|
220
|
+
elements.push(nil)
|
221
|
+
|
222
|
+
# Substitute blank strings for nils
|
223
|
+
elements.map! do |elem|
|
224
|
+
if elem === ""
|
225
|
+
nil
|
226
|
+
else
|
227
|
+
elem
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
# Add a safety clause...
|
232
|
+
if elements.size === expected_row_size
|
233
|
+
parsed_data.push(elements)
|
234
|
+
end
|
235
|
+
else
|
236
|
+
parsed_data.push(elements)
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
return parsed_data
|
241
|
+
end
|
242
|
+
|
198
243
|
# Utility function to quickly convert a search result into an array of hashes
|
199
244
|
# (keyed by the attribute name) for easier processing - this is not done by
|
200
245
|
# default on all searches as this can cause a large overhead on big data returns.
|
data/test/test_biomart.rb
CHANGED
@@ -92,6 +92,25 @@ class BiomartTest < Test::Unit::TestCase
|
|
92
92
|
assert( search2.first.is_a?( Hash ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array of hashes." )
|
93
93
|
assert( search2.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters and attributes defined with processing) is not returning the correct info." )
|
94
94
|
end
|
95
|
+
|
96
|
+
should "handle search queries that will generate poorly formatted TSV data" do
|
97
|
+
search = @htgt_targ.search(
|
98
|
+
:filters => { "mgi_accession_id" => [ "MGI:1921569", "MGI:1913402", "MGI:1913300" ] },
|
99
|
+
:attributes => [
|
100
|
+
"is_eucomm", "is_komp_csd", "is_komp_regeneron", "is_norcomm",
|
101
|
+
"is_mgp", "mgi_accession_id", "marker_symbol", "ensembl_gene_id",
|
102
|
+
"status", "status_code", "status_type", "status_description",
|
103
|
+
"status_sequence", "pipeline_stage", "htgt_project_id", "bac",
|
104
|
+
"design_id", "design_plate", "design_well", "intvec_plate",
|
105
|
+
"intvec_well", "intvec_distribute", "targvec_plate", "targvec_well",
|
106
|
+
"targvec_distribute", "backbone", "cassette", "allele_name",
|
107
|
+
"escell_clone_name", "escell_distribute", "es_cell_line", "colonies_picked",
|
108
|
+
"is_latest_for_gene", "is_targeted_non_cond"
|
109
|
+
]
|
110
|
+
)
|
111
|
+
assert( search.is_a?( Hash ), "Biomart::Dataset.search (no options) is not returning a hash." )
|
112
|
+
assert( search[:data].is_a?( Array ), "Biomart::Dataset.search[:data] (no options) is not returning an array." )
|
113
|
+
end
|
95
114
|
end
|
96
115
|
|
97
116
|
context "The Biomart module" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biomart
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Darren Oakley
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-10-
|
12
|
+
date: 2009-10-29 00:00:00 +00:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|