obo_parser 0.3.6 → 0.3.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/VERSION +1 -1
  2. data/lib/utilities.rb +212 -138
  3. data/obo_parser.gemspec +2 -2
  4. metadata +4 -4
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.6
1
+ 0.3.7
@@ -18,7 +18,7 @@ module OboParser::Utilities
18
18
  #
19
19
  # @param [Integer] cutoff only Term ids with > cutoff labels will be reported
20
20
  # @param [Array] files an Array of read files
21
- # @return [String] the transation in tab delimted format
21
+ # @return [String] the translation in tab delimted format
22
22
  def self.dump_comparison_by_id(cutoff = 0, files = [])
23
23
  return '' if files.size < 1
24
24
 
@@ -47,7 +47,9 @@ module OboParser::Utilities
47
47
  end
48
48
  end
49
49
 
50
+
50
51
  # Returns all labels found in all passed ontologies. Does not yet include synonyms.
52
+ # Caution: strips adult, embryonic, larval from labels (comment to remove)
51
53
  #
52
54
  #== Example use
53
55
  # of1 = File.read('fly_anatomy.obo')
@@ -64,8 +66,11 @@ module OboParser::Utilities
64
66
  files.each do |f|
65
67
  o = parse_obo_file(f)
66
68
  o.term_hash.keys.each do |k|
69
+
70
+ # TODO: make this optional
67
71
  tmp = k.gsub(/adult/, "").strip
68
72
  tmp = k.gsub(/embryonic\/larval/, "").strip
73
+
69
74
  if comparison[tmp]
70
75
  comparison[tmp] += 1
71
76
  else
@@ -86,10 +91,9 @@ module OboParser::Utilities
86
91
 
87
92
  end
88
93
 
94
+ #== Two column correspondances and translation tools
89
95
 
90
- #== Two column translation tools
91
-
92
- HOMOLONTO_HEADER = %{
96
+ HOMOLONTO_HEADER = %{
93
97
  format-version: 1.2
94
98
  auto-generated-by: obo_parser
95
99
  default-namespace: fix_me
@@ -212,31 +216,129 @@ is_anti_symmetric: true
212
216
  true
213
217
  end
214
218
 
215
- # Takes a two column input file, references it to two ontologies, and returns a hash
219
+
220
+ # Takes a two column input file, references it to two ontologies, and returns a report
221
+ # that identifies data pairs that have parents who are also a data pair given a
222
+ # provided property/relation type.
216
223
  #
217
224
  #== Example use
218
225
  # file = File.read('HAO_TGMA_list.txt')
219
226
  # col1_obo = File.read('hao.obo')
220
227
  # col2_obo = File.read('tgma.obo')
228
+ #
229
+ # foo = OboParser::Utilities.parents(:data => data, :col1_obo => col1_obo, :col2_obo => col2_obo, :property => 'is_a')
230
+ #
231
+ # puts "-- NO (#{foo[:no].size})\n"
232
+ # puts foo[:no].join("\n")
233
+ # puts "-- YES (#{foo[:yes].size})\n"
234
+ # puts foo[:yes].join("\n")
235
+ #
236
+ # @param [Hash] options options.
237
+ # @param [Symbol] data the two column data file.
238
+ # @param [Symbol] colo1_obo the OBO file referenced in the first column
239
+ # @param [Symbol] colo2_obo the OBO file referenced in the second column
240
+ # @param [Symbol] property the OBO relationship/property to check against (e.g. 'is_a', 'part_of')
241
+ # @return [Hash] a hash of {:yes => {}, :no => {}}
242
+ def parents(options = {})
243
+ opt = {
244
+ :data => nil,
245
+ :col1_obo => nil,
246
+ :col2_obo => nil,
247
+ :property => nil
248
+ }.merge!(options)
249
+
250
+ return false if opt[:property].nil?
251
+ c1obo = parse_obo_file(opt[:col1_obo])
252
+ c2obo = parse_obo_file(opt[:col2_obo])
253
+
254
+ result = {:yes => [], :no => [], :unplaced => []}
255
+ # update
256
+ array = OboParser::Utilities.arrayify_pairs(:data => opt[:data], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])
257
+
258
+ # index the pairs
259
+ index = array.inject({}){|hsh, i| hsh.update({"#{i[0]}#{i[1]}" => i})}
260
+
261
+ obo1_hash = c1obo.id_index
262
+ obo2_hash = c2obo.id_index
263
+
264
+ array.each do |k|
265
+ a = k[0]
266
+ b = k[1]
267
+
268
+ ids_1 = []
269
+ ids_2 = []
270
+
271
+ if !obo1_hash[a]
272
+ puts "can't find #{k}\n"
273
+ next
274
+ end
275
+
276
+ if !obo2_hash[b]
277
+ puts "can't find #{k}\n"
278
+ next
279
+ end
280
+
281
+ obo1_hash[a].relationships.each do |rel, id|
282
+ if rel == opt[:property]
283
+ ids_1.push id
284
+ end
285
+ end
286
+
287
+ obo2_hash[b].relationships.each do |rel, id|
288
+ if rel == opt[:property]
289
+ ids_2.push id
290
+ end
291
+ end
292
+
293
+ unplaced = true
294
+
295
+ ids_1.each do |c|
296
+ ids_2.each do |d|
297
+ t = "#{a} -> #{b}"
298
+ if index["#{c}#{d}"] # think this is right hash[c] == d
299
+ result[:yes].push(t)
300
+ unplaced = false
301
+ next # don't add again after we find a hit
302
+ else
303
+ result[:no].push(t)
304
+ unplaced = false
305
+ end
306
+ end
307
+ end
308
+ result[:unplaced]
309
+
310
+ end
311
+
312
+ result
313
+ end
314
+
315
+
316
+ # Takes a two column input file, references it to two ontologies, and returns a array of [[id1, id2], [id3,idn] ...].
317
+ #
318
+ #== Example use
319
+ # file = File.read('HAO_TGMA_list.txt')
320
+ # col1_obo = File.read('hao.obo')
321
+ # col2_obo = File.read('tgma.obo')
221
322
  #
222
- # OboParser::Utilities.hashify_pairs(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo)
323
+ # OboParser::Utilities.arrayify_pairs(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo)
223
324
  #
224
325
  # @param [Hash] options options.
225
326
  # @param [Symbol] data the two column data file.
226
327
  # @param [Symbol] colo1_obo the OBO file referenced in the first column
227
328
  # @param [Symbol] colo2_obo the OBO file referenced in the second column
228
- # @return [Hash] a hash of {id string => id string}
229
- def self.hashify_pairs(options = {})
329
+ # @return [Array] an array of [[id string, id string], ...]
330
+ def self.arrayify_pairs(options = {})
230
331
  opt = {
231
332
  :data => nil,
232
333
  :col1_obo => nil,
233
334
  :col2_obo => nil,
335
+ :index_start => 0
234
336
  }.merge!(options)
235
-
337
+
236
338
  c1obo = parse_obo_file(opt[:col1_obo])
237
339
  c2obo = parse_obo_file(opt[:col2_obo])
238
340
 
239
- hash = Hash.new
341
+ array = Array.new
240
342
 
241
343
  i = opt[:index_start]
242
344
  v1 = nil # a label like 'head'
@@ -246,7 +348,7 @@ is_anti_symmetric: true
246
348
 
247
349
  opt[:data].split(/\n/).each do |row|
248
350
  i += 1
249
- c1, c2 = row.split(/\t/).map(&:strip)
351
+ c1, c2 = row.split(/\t/).map(&:strip)
250
352
 
251
353
  if c1.nil? || c2.nil?
252
354
  next
@@ -263,11 +365,78 @@ is_anti_symmetric: true
263
365
  else
264
366
  v2 = c2obo.term_hash[c2]
265
367
  end
266
-
267
- hash.merge!(c1 => c2)
268
-
368
+
369
+ array << [c1,c2]
370
+
371
+ end
372
+ return array
373
+ end
374
+
375
+
376
+ # Provides a set comparisson of correspondances between two sets of correspondances. Finds correspondances that are shared, in left only, or in right only.
377
+ #
378
+ #== Example use
379
+ # data1 = File.read('input/hao_fbbt_vetted_list2.txt')
380
+ # data2 = File.read('input/hao_fbbt_from_bioportal.txt')
381
+ # col1_obo = File.read('hao.obo')
382
+ # col2_obo = File.read('tgma.obo')
383
+ #
384
+ # OboParser::Utilities.set_comparison(:data1 => data1 , :data2 => data2, :col1_obo => col1_obo, :col2_obo => col2_obo, :return => :text ) # => a text report
385
+ # or
386
+ # foo = OboParser::Utilities.set_comparison(:data1 => data1 , :data2 => data2, :col1_obo => col1_obo, :col2_obo => col2_obo, :return => :array )
387
+ #
388
+ #== "Report" types
389
+ # Use :return to return different values
390
+ # :array - Returns an Array
391
+ # :text - Generates 4 column report (id, label, id, label) to STDOUT
392
+ #
393
+ # @param [Hash] options options.
394
+ # @param [Symbol] data1 two columns (tab) with a correspondance between ontologies 1 and 2
395
+ # @param [Symbol] data2 as data1, a second set of correspondances
396
+ # @param [Symbol] col1_obo the OBO file corresponding to the first column of data1 and data2
397
+ # @param [Symbol] col2_obo the OBO file corresponding to the second column of data1 and data2
398
+ # @param [Symbol] return the value to return
399
+ # @return [String | Array]
400
+ def self.set_comparison(options = {})
401
+ opt = {
402
+ :data1 => nil,
403
+ :data2 => nil,
404
+ :col1_obo => nil,
405
+ :col2_obo => nil,
406
+ :return => :array # also :text
407
+ }.merge!(options)
408
+
409
+ c1obo = parse_obo_file(opt[:col1_obo])
410
+ c2obo = parse_obo_file(opt[:col2_obo])
411
+
412
+ th1 = c1obo.id_hash
413
+ th2 = c2obo.id_hash
414
+
415
+ array1 = OboParser::Utilities.arrayify_pairs(:data => opt[:data1], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])
416
+ array2 = OboParser::Utilities.arrayify_pairs(:data => opt[:data2], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])
417
+
418
+ index1 = array1.inject({}){|hsh, i| hsh.merge!( "#{i[0]}#{i[1]}" => i)}
419
+ index2 = array2.inject({}){|hsh, i| hsh.merge!( "#{i[0]}#{i[1]}" => i)}
420
+
421
+ result = {:in_left => [], :in_right => [], :in_both => []}
422
+
423
+ result[:in_both] = (index1.keys & index2.keys).collect{|k| [ index1[k][0], th1[index1[k][0]], index1[k][1], th2[index1[k][1]] , ] }
424
+ result[:in_left] = ((index1.keys | index2.keys) - index2.keys ).collect{|k| [ index1[k][0], th1[index1[k][0]], index1[k][1], th2[index1[k][1]] , ] }
425
+ result[:in_right] = ((index1.keys | index2.keys) - index1.keys ).collect{|k| [ index2[k][0], th1[index2[k][0]], index2[k][1], th2[index2[k][1]] , ] }
426
+
427
+ case opt[:return]
428
+ when :array
429
+ return result
430
+ when :text
431
+ [:in_both, :in_left, :in_right].each do |t|
432
+ puts "--- #{t.to_s} (#{result[t].size}) ---"
433
+ result[t].each do |r|
434
+ puts r.join("\t")
435
+ end
436
+ end
437
+ else
438
+ return nil
269
439
  end
270
- return hash
271
440
  end
272
441
 
273
442
 
@@ -290,143 +459,48 @@ is_anti_symmetric: true
290
459
  end
291
460
 
292
461
 
293
- # Takes a Hash of OBO ontology files, an Array of relationships, and writes two input files (a network, and node properties) for Cytoscape
294
- #
295
- #== Example use
296
- # OboParser::Utilities.cytoscapify(:ontologies => {'HAO' => File.read('input/hao.obo'), 'TADS' => File.read('input/tads.obo'), 'TGMA' => File.read('input/tgma.obo'), 'FBBT' => File.read('input/fbbt.obo') }, :properties => ['is_a', 'part_of'])
297
- #
298
- # @param [Symbol] ontologies a Hash of #read files as values, keys as working names
299
- # @param [Symbol] properties an Array of properties like ['is_a', 'part_of']
300
- # TODO: @return File1, File2, Filen
301
- def self.cytoscapify(options = {})
302
- opt = {
303
- :ontologies => {},
304
- :properties => []
305
- }.merge!(options)
306
-
307
- return false if opt[:properties].empty?
308
- return false if opt[:ontologies].empty?
309
-
310
- nodes = File.new("nodes.tab", "w+")
311
- edges = File.new("edges.eda", "w+")
312
-
313
- opt[:ontologies].keys.each do |k|
314
-
315
- obo_file = parse_obo_file(opt[:ontologies][k])
316
-
317
- obo_file.terms.each do |t|
318
- nodes.puts [t.id.value, t.name.value, k].join("\t") + "\n"
319
-
320
- t.relationships.each do |rel, id|
321
- edges.puts [t.id.value, "(#{rel})", id].join("\t") + "\n" if opt[:properties].include?(rel)
322
- end
323
- end
324
- end
325
-
326
- nodes.close
327
- edges.close
328
-
329
- true
330
-
331
- end
332
-
462
+ # Takes a Hash of OBO ontology files, an Array of relationships, and writes two input files (a network, and node properties) for Cytoscape
463
+ #
464
+ #== Example use
465
+ # OboParser::Utilities.cytoscapify(:ontologies => {'HAO' => File.read('input/hao.obo'), 'TADS' => File.read('input/tads.obo'), 'TGMA' => File.read('input/tgma.obo'), 'FBBT' => File.read('input/fbbt.obo') }, :properties => ['is_a', 'part_of'])
466
+ #
467
+ # @param [Symbol] ontologies a Hash of #read files as values, keys as working names
468
+ # @param [Symbol] properties an Array of properties like ['is_a', 'part_of']
469
+ # TODO: @return File1, File2, Filen
470
+ def self.cytoscapify(options = {})
471
+ opt = {
472
+ :ontologies => {},
473
+ :properties => []
474
+ }.merge!(options)
333
475
 
334
- # Takes a two column input file, references it to two ontologies, and returns a report
335
- # that identifies data pairs that have parents who are also a data pair given a
336
- # provided property/relation type.
337
- #
338
- #== Example use
339
- # file = File.read('HAO_TGMA_list.txt')
340
- # col1_obo = File.read('hao.obo')
341
- # col2_obo = File.read('tgma.obo')
342
- #
343
- # foo = OboParser::Utilities.parents(:data => data, :col1_obo => col1_obo, :col2_obo => col2_obo, :property => 'is_a')
344
- #
345
- # puts "-- NO (#{foo[:no].size})\n"
346
- # puts foo[:no].join("\n")
347
- # puts "-- YES (#{foo[:yes].size})\n"
348
- # puts foo[:yes].join("\n")
349
- #
350
- # @param [Hash] options options.
351
- # @param [Symbol] data the two column data file.
352
- # @param [Symbol] colo1_obo the OBO file referenced in the first column
353
- # @param [Symbol] colo2_obo the OBO file referenced in the second column
354
- # @param [Symbol] property the OBO relationship/property to check against (e.g. 'is_a', 'part_of')
355
- # @return [Hash] a hash of {:yes => {}, :no => {}}
356
- def self.parents(options = {})
357
- opt = {
358
- :data => nil,
359
- :col1_obo => nil,
360
- :col2_obo => nil,
361
- :property => nil
362
- }.merge!(options)
363
-
364
- return false if opt[:property].nil?
365
- c1obo = parse_obo_file(opt[:col1_obo])
366
- c2obo = parse_obo_file(opt[:col2_obo])
367
-
368
- result = {:yes => [], :no => [], :unplaced => []}
369
- # update
370
- hash = hashify_pairs(:data => opt[:data], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])
371
-
372
- obo1_hash = c1obo.id_index
373
- obo2_hash = c2obo.id_index
374
-
375
- hash.keys.each do |k|
376
- a = k
377
- b = hash[a]
378
-
379
- ids_1 = []
380
- ids_2 = []
381
-
382
- if !obo1_hash[a]
383
- puts "can't find #{k}\n"
384
- next
385
- end
476
+ return false if opt[:properties].empty?
477
+ return false if opt[:ontologies].empty?
386
478
 
387
- if !obo2_hash[b]
388
- puts "can't find #{k}\n"
389
- next
390
- end
479
+ nodes = File.new("nodes.tab", "w+")
480
+ edges = File.new("edges.eda", "w+")
391
481
 
392
- obo1_hash[a].relationships.each do |rel, id|
393
- if rel == opt[:property]
394
- ids_1.push id
395
- end
396
- end
482
+ opt[:ontologies].keys.each do |k|
397
483
 
398
- obo2_hash[b].relationships.each do |rel, id|
399
- if rel == opt[:property]
400
- ids_2.push id
401
- end
402
- end
484
+ obo_file = parse_obo_file(opt[:ontologies][k])
403
485
 
404
- unplaced = true
486
+ obo_file.terms.each do |t|
487
+ nodes.puts [t.id.value, t.name.value, k].join("\t") + "\n"
405
488
 
406
- ids_1.each do |c|
407
- ids_2.each do |d|
408
- t = "#{a} -> #{b}"
409
- if hash[c] == d
410
- result[:yes].push(t)
411
- unplaced = false
412
- next # don't add again after we find a hit
413
- else
414
- result[:no].push(t)
415
- unplaced = false
489
+ t.relationships.each do |rel, id|
490
+ edges.puts [t.id.value, "(#{rel})", id].join("\t") + "\n" if opt[:properties].include?(rel)
416
491
  end
417
492
  end
418
493
  end
419
- result[:unplaced]
420
494
 
421
- end
422
-
423
- result
424
- end
495
+ nodes.close
496
+ edges.close
425
497
 
498
+ true
426
499
 
500
+ end
427
501
 
428
502
 
429
- #== Helper methods that don't require the obo_parser library
503
+ #== Helper methods that don't require the obo_parser library
430
504
 
431
505
  # Given a Term id and a String representing an OBO file returns that stanza.
432
506
  #
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{obo_parser}
8
- s.version = "0.3.6"
8
+ s.version = "0.3.7"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["mjy"]
12
- s.date = %q{2011-08-10}
12
+ s.date = %q{2011-08-25}
13
13
  s.description = %q{Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. }
14
14
  s.email = %q{diapriid@gmail.com}
15
15
  s.extra_rdoc_files = [
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: obo_parser
3
3
  version: !ruby/object:Gem::Version
4
- hash: 31
4
+ hash: 29
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 3
9
- - 6
10
- version: 0.3.6
9
+ - 7
10
+ version: 0.3.7
11
11
  platform: ruby
12
12
  authors:
13
13
  - mjy
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-08-10 00:00:00 Z
18
+ date: 2011-08-25 00:00:00 Z
19
19
  dependencies: []
20
20
 
21
21
  description: "Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. "