biomart 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -5,3 +5,16 @@
5
5
  * Basic gathering of information about a Biomart server.
6
6
  * Counting functionality.
7
7
  * Basic searching of a single dataset.
8
+
9
+ === 0.1.1 2009-10-21
10
+
11
+ * 1 minor enhancement:
12
+ * Addition of an .alive? function to the Server and Dataset classes
13
+ to allow a user to ping a biomart server to make sure it is online
14
+ and functioning as expected.
15
+
16
+ === 0.1.2 2009-10-29
17
+
18
+ * 1 major bugfix:
19
+ * Added in code to handle poorly formatted tab-separated data
20
+ coming back from a biomart query.
data/biomart.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{biomart}
5
- s.version = "0.1.1"
5
+ s.version = "0.1.2"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Darren Oakley"]
9
- s.date = %q{2009-10-21}
9
+ s.date = %q{2009-10-29}
10
10
  s.description = %q{A ruby API for interacting with Biomart services.}
11
11
  s.email = ["daz.oakley@gmail.com"]
12
12
  s.extra_rdoc_files = ["History.txt", "Manifest.txt", "README.rdoc"]
data/lib/biomart.rb CHANGED
@@ -7,7 +7,7 @@ require "rubygems"
7
7
  require "builder"
8
8
 
9
9
  module Biomart
10
- VERSION = "0.1.1"
10
+ VERSION = "0.1.2"
11
11
 
12
12
  # This is the base Biomart error/exception class. Rescue it if
13
13
  # you want to catch any exceptions that this code might raise.
@@ -175,7 +175,8 @@ module Biomart
175
175
  # Utility function to transform the tab-separated data retrieved
176
176
  # from the Biomart search query into a ruby object.
177
177
  def process_tsv( args, tsv )
178
- headers = []
178
+ headers = []
179
+ parsed_data = []
179
180
 
180
181
  if args[:attributes]
181
182
  args[:attributes].each do |attribute|
@@ -189,12 +190,56 @@ module Biomart
189
190
  end
190
191
  end
191
192
 
193
+ begin
194
+ parsed_data = CSV.parse( tsv, "\t" )
195
+ rescue CSV::IllegalFormatError => e
196
+ parsed_data = parse_tsv_line_by_line( headers.size, tsv )
197
+ end
198
+
192
199
  return {
193
200
  :headers => headers,
194
- :data => CSV.parse( tsv, "\t" )
201
+ :data => parsed_data
195
202
  }
196
203
  end
197
-
204
+
205
+ # Utility function to process TSV formatted data that raises errors. (Biomart
206
+ # has a habit of serving out this...) First attempts to use the CSV modules
207
+ # 'parse_line' function to read in the data, if that fails, tries to use split
208
+ # to recover the data.
209
+ def parse_tsv_line_by_line( expected_row_size, tsv )
210
+ parsed_data = []
211
+
212
+ data_by_line = tsv.split("\n")
213
+ data_by_line.each do |line|
214
+ elements = CSV::parse_line( line, "\t" )
215
+
216
+ if elements.size == 0
217
+ # This is a bad line (causing the above Exception), try and use split to recover.
218
+ # Alse add an empty value as split will miss the final value...
219
+ elements = line.split("\t")
220
+ elements.push(nil)
221
+
222
+ # Substitute blank strings for nils
223
+ elements.map! do |elem|
224
+ if elem === ""
225
+ nil
226
+ else
227
+ elem
228
+ end
229
+ end
230
+
231
+ # Add a safety clause...
232
+ if elements.size === expected_row_size
233
+ parsed_data.push(elements)
234
+ end
235
+ else
236
+ parsed_data.push(elements)
237
+ end
238
+ end
239
+
240
+ return parsed_data
241
+ end
242
+
198
243
  # Utility function to quickly convert a search result into an array of hashes
199
244
  # (keyed by the attribute name) for easier processing - this is not done by
200
245
  # default on all searches as this can cause a large overhead on big data returns.
data/test/test_biomart.rb CHANGED
@@ -92,6 +92,25 @@ class BiomartTest < Test::Unit::TestCase
92
92
  assert( search2.first.is_a?( Hash ), "Biomart::Dataset.search (filters and attributes defined with processing) is not returning an array of hashes." )
93
93
  assert( search2.first["marker_symbol"] == "Cbx1", "Biomart::Dataset.search (filters and attributes defined with processing) is not returning the correct info." )
94
94
  end
95
+
96
+ should "handle search queries that will generate poorly formatted TSV data" do
97
+ search = @htgt_targ.search(
98
+ :filters => { "mgi_accession_id" => [ "MGI:1921569", "MGI:1913402", "MGI:1913300" ] },
99
+ :attributes => [
100
+ "is_eucomm", "is_komp_csd", "is_komp_regeneron", "is_norcomm",
101
+ "is_mgp", "mgi_accession_id", "marker_symbol", "ensembl_gene_id",
102
+ "status", "status_code", "status_type", "status_description",
103
+ "status_sequence", "pipeline_stage", "htgt_project_id", "bac",
104
+ "design_id", "design_plate", "design_well", "intvec_plate",
105
+ "intvec_well", "intvec_distribute", "targvec_plate", "targvec_well",
106
+ "targvec_distribute", "backbone", "cassette", "allele_name",
107
+ "escell_clone_name", "escell_distribute", "es_cell_line", "colonies_picked",
108
+ "is_latest_for_gene", "is_targeted_non_cond"
109
+ ]
110
+ )
111
+ assert( search.is_a?( Hash ), "Biomart::Dataset.search (no options) is not returning a hash." )
112
+ assert( search[:data].is_a?( Array ), "Biomart::Dataset.search[:data] (no options) is not returning an array." )
113
+ end
95
114
  end
96
115
 
97
116
  context "The Biomart module" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biomart
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Darren Oakley
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-10-21 00:00:00 +01:00
12
+ date: 2009-10-29 00:00:00 +00:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency