biomart 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -5,3 +5,16 @@
5
5
  * Basic gathering of information about a Biomart server.
6
6
  * Counting functionality.
7
7
  * Basic searching of a single dataset.
8
+
9
+ === 0.1.1 2009-10-21
10
+
11
+ * 1 minor enhancement:
12
+ * Addition of an .alive? function to the Server and Dataset classes
13
+ to allow a user to ping a biomart server to make sure it is online
14
+ and functioning as expected.
15
+
16
+ === 0.1.2 2009-10-29
17
+
18
+ * 1 major bugfix:
19
+ * Added in code to handle poorly formatted tab-separated data
20
+ coming back from a biomart query.
data/biomart.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{biomart}
5
- s.version = "0.1.1"
5
+ s.version = "0.1.2"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Darren Oakley"]
9
- s.date = %q{2009-10-21}
9
+ s.date = %q{2009-10-29}
10
10
  s.description = %q{A ruby API for interacting with Biomart services.}
11
11
  s.email = ["daz.oakley@gmail.com"]
12
12
  s.extra_rdoc_files = ["History.txt", "Manifest.txt", "README.rdoc"]
data/lib/biomart.rb CHANGED
@@ -7,7 +7,7 @@ require "rubygems"
7
7
  require "builder"
8
8
 
9
9
  module Biomart
10
- VERSION = "0.1.1"
10
+ VERSION = "0.1.2"
11
11
 
12
12
  # This is the base Biomart error/exception class. Rescue it if
13
13
  # you want to catch any exceptions that this code might raise.
@@ -175,7 +175,8 @@ module Biomart
175
175
  # Utility function to transform the tab-separated data retrieved
176
176
  # from the Biomart search query into a ruby object.
177
177
  def process_tsv( args, tsv )
178
- headers = []
178
+ headers = []
179
+ parsed_data = []
179
180
 
180
181
  if args[:attributes]
181
182
  args[:attributes].each do |attribute|
@@ -189,12 +190,56 @@ module Biomart
189
190
  end
190
191
  end
191
192
 
193
+ begin
194
+ parsed_data = CSV.parse( tsv, "\t" )
195
+ rescue CSV::IllegalFormatError => e
196
+ parsed_data = parse_tsv_line_by_line( headers.size, tsv )
197
+ end
198
+
192
199
  return {
193
200
  :headers => headers,
194
- :data => CSV.parse( tsv, "\t" )
201
+ :data => parsed_data
195
202
  }
196
203
  end
197
-
204
+
205
+ # Utility function to process TSV formatted data that raises errors. (Biomart
206
+ # has a habit of serving out this...) First attempts to use the CSV modules
207
+ # 'parse_line' function to read in the data, if that fails, tries to use split
208
+ # to recover the data.
209
+ def parse_tsv_line_by_line( expected_row_size, tsv )
210
+ parsed_data = []
211
+
212
+ data_by_line = tsv.split("\n")
213
+ data_by_line.each do |line|
214
+ elements = CSV::parse_line( line, "\t" )
215
+
216
+ if elements.size == 0
217
+ # This is a bad line (causing the above Exception), try and use split to recover.
218
+ # Alse add an empty value as split will miss the final value...
219
+ elements = line.split("\t")
220
+ elements.push(nil)
221
+
222
+ # Substitute blank strings for nils
223
+ elements.map! do |elem|
224
+ if elem === ""
225
+ nil
226
+ else
227
+ elem
228
+ end
229
+ end
230
+
231
+ # Add a safety clause...
232
+ if elements.size === expected_row_size
233
+ parsed_data.push(elements)
234
+ end
235
+ else
236
+ parsed_data.push(elements)
237
+ end
238
+ end
239
+
240
+ return parsed_data
241
+ end
242
+
198
243
  # Utility function to quickly convert a search result into an array of hashes
199
244
  # (keyed by the attribute name) for easier processing - this is not done by
200
245
  # default on all searches as this can cause a large overhead on big data returns.
data/test/test_biomart.rb CHANGED
@@ -92,6 +92,25 @@ class BiomartTest < Test::Unit::TestCase
92
92
  assert( search2.first.is_a?( Hash ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array of hashes." )
93
93
  assert( search2.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters and attributes defined with processing) is not returning the correct info." )
94
94
  end
95
+
96
+ should "handle search queries that will generate poorly formatted TSV data" do
97
+ search = @htgt_targ.search(
98
+ :filters => { "mgi_accession_id" => [ "MGI:1921569", "MGI:1913402", "MGI:1913300" ] },
99
+ :attributes => [
100
+ "is_eucomm", "is_komp_csd", "is_komp_regeneron", "is_norcomm",
101
+ "is_mgp", "mgi_accession_id", "marker_symbol", "ensembl_gene_id",
102
+ "status", "status_code", "status_type", "status_description",
103
+ "status_sequence", "pipeline_stage", "htgt_project_id", "bac",
104
+ "design_id", "design_plate", "design_well", "intvec_plate",
105
+ "intvec_well", "intvec_distribute", "targvec_plate", "targvec_well",
106
+ "targvec_distribute", "backbone", "cassette", "allele_name",
107
+ "escell_clone_name", "escell_distribute", "es_cell_line", "colonies_picked",
108
+ "is_latest_for_gene", "is_targeted_non_cond"
109
+ ]
110
+ )
111
+ assert( search.is_a?( Hash ), "Biomart::Dataset.search (no options) is not returning a hash." )
112
+ assert( search[:data].is_a?( Array ), "Biomart::Dataset.search[:data] (no options) is not returning an array." )
113
+ end
95
114
  end
96
115
 
97
116
  context "The Biomart module" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biomart
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Darren Oakley
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-10-21 00:00:00 +01:00
12
+ date: 2009-10-29 00:00:00 +00:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency