obo_parser 0.3.6 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/VERSION +1 -1
  2. data/lib/utilities.rb +212 -138
  3. data/obo_parser.gemspec +2 -2
  4. metadata +4 -4
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.6
1
+ 0.3.7
@@ -18,7 +18,7 @@ module OboParser::Utilities
18
18
  #
19
19
  # @param [Integer] cutoff only Term ids with > cutoff labels will be reported
20
20
  # @param [Array] files an Array of read files
21
- # @return [String] the transation in tab delimted format
21
+ # @return [String] the translation in tab delimted format
22
22
  def self.dump_comparison_by_id(cutoff = 0, files = [])
23
23
  return '' if files.size < 1
24
24
 
@@ -47,7 +47,9 @@ module OboParser::Utilities
47
47
  end
48
48
  end
49
49
 
50
+
50
51
  # Returns all labels found in all passed ontologies. Does not yet include synonyms.
52
+ # Caution: strips adult, embryonic, larval from labels (comment to remove)
51
53
  #
52
54
  #== Example use
53
55
  # of1 = File.read('fly_anatomy.obo')
@@ -64,8 +66,11 @@ module OboParser::Utilities
64
66
  files.each do |f|
65
67
  o = parse_obo_file(f)
66
68
  o.term_hash.keys.each do |k|
69
+
70
+ # TODO: make this optional
67
71
  tmp = k.gsub(/adult/, "").strip
68
72
  tmp = k.gsub(/embryonic\/larval/, "").strip
73
+
69
74
  if comparison[tmp]
70
75
  comparison[tmp] += 1
71
76
  else
@@ -86,10 +91,9 @@ module OboParser::Utilities
86
91
 
87
92
  end
88
93
 
94
+ #== Two column correspondances and translation tools
89
95
 
90
- #== Two column translation tools
91
-
92
- HOMOLONTO_HEADER = %{
96
+ HOMOLONTO_HEADER = %{
93
97
  format-version: 1.2
94
98
  auto-generated-by: obo_parser
95
99
  default-namespace: fix_me
@@ -212,31 +216,129 @@ is_anti_symmetric: true
212
216
  true
213
217
  end
214
218
 
215
- # Takes a two column input file, references it to two ontologies, and returns a hash
219
+
220
+ # Takes a two column input file, references it to two ontologies, and returns a report
221
+ # that identifies data pairs that have parents who are also a data pair given a
222
+ # provided property/relation type.
216
223
  #
217
224
  #== Example use
218
225
  # file = File.read('HAO_TGMA_list.txt')
219
226
  # col1_obo = File.read('hao.obo')
220
227
  # col2_obo = File.read('tgma.obo')
228
+ #
229
+ # foo = OboParser::Utilities.parents(:data => data, :col1_obo => col1_obo, :col2_obo => col2_obo, :property => 'is_a')
230
+ #
231
+ # puts "-- NO (#{foo[:no].size})\n"
232
+ # puts foo[:no].join("\n")
233
+ # puts "-- YES (#{foo[:yes].size})\n"
234
+ # puts foo[:yes].join("\n")
235
+ #
236
+ # @param [Hash] options options.
237
+ # @param [Symbol] data the two column data file.
238
+ # @param [Symbol] colo1_obo the OBO file referenced in the first column
239
+ # @param [Symbol] colo2_obo the OBO file referenced in the second column
240
+ # @param [Symbol] property the OBO relationship/property to check against (e.g. 'is_a', 'part_of')
241
+ # @return [Hash] a hash of {:yes => {}, :no => {}}
242
+ def parents(options = {})
243
+ opt = {
244
+ :data => nil,
245
+ :col1_obo => nil,
246
+ :col2_obo => nil,
247
+ :property => nil
248
+ }.merge!(options)
249
+
250
+ return false if opt[:property].nil?
251
+ c1obo = parse_obo_file(opt[:col1_obo])
252
+ c2obo = parse_obo_file(opt[:col2_obo])
253
+
254
+ result = {:yes => [], :no => [], :unplaced => []}
255
+ # update
256
+ array = OboParser::Utilities.arrayify_pairs(:data => opt[:data], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])
257
+
258
+ # index the pairs
259
+ index = array.inject({}){|hsh, i| hsh.update({"#{i[0]}#{i[1]}" => i})}
260
+
261
+ obo1_hash = c1obo.id_index
262
+ obo2_hash = c2obo.id_index
263
+
264
+ array.each do |k|
265
+ a = k[0]
266
+ b = k[1]
267
+
268
+ ids_1 = []
269
+ ids_2 = []
270
+
271
+ if !obo1_hash[a]
272
+ puts "can't find #{k}\n"
273
+ next
274
+ end
275
+
276
+ if !obo2_hash[b]
277
+ puts "can't find #{k}\n"
278
+ next
279
+ end
280
+
281
+ obo1_hash[a].relationships.each do |rel, id|
282
+ if rel == opt[:property]
283
+ ids_1.push id
284
+ end
285
+ end
286
+
287
+ obo2_hash[b].relationships.each do |rel, id|
288
+ if rel == opt[:property]
289
+ ids_2.push id
290
+ end
291
+ end
292
+
293
+ unplaced = true
294
+
295
+ ids_1.each do |c|
296
+ ids_2.each do |d|
297
+ t = "#{a} -> #{b}"
298
+ if index["#{c}#{d}"] # think this is right hash[c] == d
299
+ result[:yes].push(t)
300
+ unplaced = false
301
+ next # don't add again after we find a hit
302
+ else
303
+ result[:no].push(t)
304
+ unplaced = false
305
+ end
306
+ end
307
+ end
308
+ result[:unplaced]
309
+
310
+ end
311
+
312
+ result
313
+ end
314
+
315
+
316
+ # Takes a two column input file, references it to two ontologies, and returns a array of [[id1, id2], [id3,idn] ...].
317
+ #
318
+ #== Example use
319
+ # file = File.read('HAO_TGMA_list.txt')
320
+ # col1_obo = File.read('hao.obo')
321
+ # col2_obo = File.read('tgma.obo')
221
322
  #
222
- # OboParser::Utilities.hashify_pairs(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo)
323
+ # OboParser::Utilities.arrayify_pairs(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo)
223
324
  #
224
325
  # @param [Hash] options options.
225
326
  # @param [Symbol] data the two column data file.
226
327
  # @param [Symbol] colo1_obo the OBO file referenced in the first column
227
328
  # @param [Symbol] colo2_obo the OBO file referenced in the second column
228
- # @return [Hash] a hash of {id string => id string}
229
- def self.hashify_pairs(options = {})
329
+ # @return [Array] an array of [[id string, id string], ...]
330
+ def self.arrayify_pairs(options = {})
230
331
  opt = {
231
332
  :data => nil,
232
333
  :col1_obo => nil,
233
334
  :col2_obo => nil,
335
+ :index_start => 0
234
336
  }.merge!(options)
235
-
337
+
236
338
  c1obo = parse_obo_file(opt[:col1_obo])
237
339
  c2obo = parse_obo_file(opt[:col2_obo])
238
340
 
239
- hash = Hash.new
341
+ array = Array.new
240
342
 
241
343
  i = opt[:index_start]
242
344
  v1 = nil # a label like 'head'
@@ -246,7 +348,7 @@ is_anti_symmetric: true
246
348
 
247
349
  opt[:data].split(/\n/).each do |row|
248
350
  i += 1
249
- c1, c2 = row.split(/\t/).map(&:strip)
351
+ c1, c2 = row.split(/\t/).map(&:strip)
250
352
 
251
353
  if c1.nil? || c2.nil?
252
354
  next
@@ -263,11 +365,78 @@ is_anti_symmetric: true
263
365
  else
264
366
  v2 = c2obo.term_hash[c2]
265
367
  end
266
-
267
- hash.merge!(c1 => c2)
268
-
368
+
369
+ array << [c1,c2]
370
+
371
+ end
372
+ return array
373
+ end
374
+
375
+
376
+ # Provides a set comparisson of correspondances between two sets of correspondances. Finds correspondances that are shared, in left only, or in right only.
377
+ #
378
+ #== Example use
379
+ # data1 = File.read('input/hao_fbbt_vetted_list2.txt')
380
+ # data2 = File.read('input/hao_fbbt_from_bioportal.txt')
381
+ # col1_obo = File.read('hao.obo')
382
+ # col2_obo = File.read('tgma.obo')
383
+ #
384
+ # OboParser::Utilities.set_comparison(:data1 => data1 , :data2 => data2, :col1_obo => col1_obo, :col2_obo => col2_obo, :return => :text ) # => a text report
385
+ # or
386
+ # foo = OboParser::Utilities.set_comparison(:data1 => data1 , :data2 => data2, :col1_obo => col1_obo, :col2_obo => col2_obo, :return => :array )
387
+ #
388
+ #== "Report" types
389
+ # Use :return to return different values
390
+ # :array - Returns an Array
391
+ # :text - Generates 4 column report (id, label, id, label) to STDOUT
392
+ #
393
+ # @param [Hash] options options.
394
+ # @param [Symbol] data1 two columns (tab) with a correspondance between ontologies 1 and 2
395
+ # @param [Symbol] data2 as data1, a second set of correspondances
396
+ # @param [Symbol] col1_obo the OBO file corresponding to the first column of data1 and data2
397
+ # @param [Symbol] col2_obo the OBO file corresponding to the second column of data1 and data2
398
+ # @param [Symbol] return the value to return
399
+ # @return [String | Array]
400
+ def self.set_comparison(options = {})
401
+ opt = {
402
+ :data1 => nil,
403
+ :data2 => nil,
404
+ :col1_obo => nil,
405
+ :col2_obo => nil,
406
+ :return => :array # also :text
407
+ }.merge!(options)
408
+
409
+ c1obo = parse_obo_file(opt[:col1_obo])
410
+ c2obo = parse_obo_file(opt[:col2_obo])
411
+
412
+ th1 = c1obo.id_hash
413
+ th2 = c2obo.id_hash
414
+
415
+ array1 = OboParser::Utilities.arrayify_pairs(:data => opt[:data1], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])
416
+ array2 = OboParser::Utilities.arrayify_pairs(:data => opt[:data2], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])
417
+
418
+ index1 = array1.inject({}){|hsh, i| hsh.merge!( "#{i[0]}#{i[1]}" => i)}
419
+ index2 = array2.inject({}){|hsh, i| hsh.merge!( "#{i[0]}#{i[1]}" => i)}
420
+
421
+ result = {:in_left => [], :in_right => [], :in_both => []}
422
+
423
+ result[:in_both] = (index1.keys & index2.keys).collect{|k| [ index1[k][0], th1[index1[k][0]], index1[k][1], th2[index1[k][1]] , ] }
424
+ result[:in_left] = ((index1.keys | index2.keys) - index2.keys ).collect{|k| [ index1[k][0], th1[index1[k][0]], index1[k][1], th2[index1[k][1]] , ] }
425
+ result[:in_right] = ((index1.keys | index2.keys) - index1.keys ).collect{|k| [ index2[k][0], th1[index2[k][0]], index2[k][1], th2[index2[k][1]] , ] }
426
+
427
+ case opt[:return]
428
+ when :array
429
+ return result
430
+ when :text
431
+ [:in_both, :in_left, :in_right].each do |t|
432
+ puts "--- #{t.to_s} (#{result[t].size}) ---"
433
+ result[t].each do |r|
434
+ puts r.join("\t")
435
+ end
436
+ end
437
+ else
438
+ return nil
269
439
  end
270
- return hash
271
440
  end
272
441
 
273
442
 
@@ -290,143 +459,48 @@ is_anti_symmetric: true
290
459
  end
291
460
 
292
461
 
293
- # Takes a Hash of OBO ontology files, an Array of relationships, and writes two input files (a network, and node properties) for Cytoscape
294
- #
295
- #== Example use
296
- # OboParser::Utilities.cytoscapify(:ontologies => {'HAO' => File.read('input/hao.obo'), 'TADS' => File.read('input/tads.obo'), 'TGMA' => File.read('input/tgma.obo'), 'FBBT' => File.read('input/fbbt.obo') }, :properties => ['is_a', 'part_of'])
297
- #
298
- # @param [Symbol] ontologies a Hash of #read files as values, keys as working names
299
- # @param [Symbol] properties an Array of properties like ['is_a', 'part_of']
300
- # TODO: @return File1, File2, Filen
301
- def self.cytoscapify(options = {})
302
- opt = {
303
- :ontologies => {},
304
- :properties => []
305
- }.merge!(options)
306
-
307
- return false if opt[:properties].empty?
308
- return false if opt[:ontologies].empty?
309
-
310
- nodes = File.new("nodes.tab", "w+")
311
- edges = File.new("edges.eda", "w+")
312
-
313
- opt[:ontologies].keys.each do |k|
314
-
315
- obo_file = parse_obo_file(opt[:ontologies][k])
316
-
317
- obo_file.terms.each do |t|
318
- nodes.puts [t.id.value, t.name.value, k].join("\t") + "\n"
319
-
320
- t.relationships.each do |rel, id|
321
- edges.puts [t.id.value, "(#{rel})", id].join("\t") + "\n" if opt[:properties].include?(rel)
322
- end
323
- end
324
- end
325
-
326
- nodes.close
327
- edges.close
328
-
329
- true
330
-
331
- end
332
-
462
+ # Takes a Hash of OBO ontology files, an Array of relationships, and writes two input files (a network, and node properties) for Cytoscape
463
+ #
464
+ #== Example use
465
+ # OboParser::Utilities.cytoscapify(:ontologies => {'HAO' => File.read('input/hao.obo'), 'TADS' => File.read('input/tads.obo'), 'TGMA' => File.read('input/tgma.obo'), 'FBBT' => File.read('input/fbbt.obo') }, :properties => ['is_a', 'part_of'])
466
+ #
467
+ # @param [Symbol] ontologies a Hash of #read files as values, keys as working names
468
+ # @param [Symbol] properties an Array of properties like ['is_a', 'part_of']
469
+ # TODO: @return File1, File2, Filen
470
+ def self.cytoscapify(options = {})
471
+ opt = {
472
+ :ontologies => {},
473
+ :properties => []
474
+ }.merge!(options)
333
475
 
334
- # Takes a two column input file, references it to two ontologies, and returns a report
335
- # that identifies data pairs that have parents who are also a data pair given a
336
- # provided property/relation type.
337
- #
338
- #== Example use
339
- # file = File.read('HAO_TGMA_list.txt')
340
- # col1_obo = File.read('hao.obo')
341
- # col2_obo = File.read('tgma.obo')
342
- #
343
- # foo = OboParser::Utilities.parents(:data => data, :col1_obo => col1_obo, :col2_obo => col2_obo, :property => 'is_a')
344
- #
345
- # puts "-- NO (#{foo[:no].size})\n"
346
- # puts foo[:no].join("\n")
347
- # puts "-- YES (#{foo[:yes].size})\n"
348
- # puts foo[:yes].join("\n")
349
- #
350
- # @param [Hash] options options.
351
- # @param [Symbol] data the two column data file.
352
- # @param [Symbol] colo1_obo the OBO file referenced in the first column
353
- # @param [Symbol] colo2_obo the OBO file referenced in the second column
354
- # @param [Symbol] property the OBO relationship/property to check against (e.g. 'is_a', 'part_of')
355
- # @return [Hash] a hash of {:yes => {}, :no => {}}
356
- def self.parents(options = {})
357
- opt = {
358
- :data => nil,
359
- :col1_obo => nil,
360
- :col2_obo => nil,
361
- :property => nil
362
- }.merge!(options)
363
-
364
- return false if opt[:property].nil?
365
- c1obo = parse_obo_file(opt[:col1_obo])
366
- c2obo = parse_obo_file(opt[:col2_obo])
367
-
368
- result = {:yes => [], :no => [], :unplaced => []}
369
- # update
370
- hash = hashify_pairs(:data => opt[:data], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])
371
-
372
- obo1_hash = c1obo.id_index
373
- obo2_hash = c2obo.id_index
374
-
375
- hash.keys.each do |k|
376
- a = k
377
- b = hash[a]
378
-
379
- ids_1 = []
380
- ids_2 = []
381
-
382
- if !obo1_hash[a]
383
- puts "can't find #{k}\n"
384
- next
385
- end
476
+ return false if opt[:properties].empty?
477
+ return false if opt[:ontologies].empty?
386
478
 
387
- if !obo2_hash[b]
388
- puts "can't find #{k}\n"
389
- next
390
- end
479
+ nodes = File.new("nodes.tab", "w+")
480
+ edges = File.new("edges.eda", "w+")
391
481
 
392
- obo1_hash[a].relationships.each do |rel, id|
393
- if rel == opt[:property]
394
- ids_1.push id
395
- end
396
- end
482
+ opt[:ontologies].keys.each do |k|
397
483
 
398
- obo2_hash[b].relationships.each do |rel, id|
399
- if rel == opt[:property]
400
- ids_2.push id
401
- end
402
- end
484
+ obo_file = parse_obo_file(opt[:ontologies][k])
403
485
 
404
- unplaced = true
486
+ obo_file.terms.each do |t|
487
+ nodes.puts [t.id.value, t.name.value, k].join("\t") + "\n"
405
488
 
406
- ids_1.each do |c|
407
- ids_2.each do |d|
408
- t = "#{a} -> #{b}"
409
- if hash[c] == d
410
- result[:yes].push(t)
411
- unplaced = false
412
- next # don't add again after we find a hit
413
- else
414
- result[:no].push(t)
415
- unplaced = false
489
+ t.relationships.each do |rel, id|
490
+ edges.puts [t.id.value, "(#{rel})", id].join("\t") + "\n" if opt[:properties].include?(rel)
416
491
  end
417
492
  end
418
493
  end
419
- result[:unplaced]
420
494
 
421
- end
422
-
423
- result
424
- end
495
+ nodes.close
496
+ edges.close
425
497
 
498
+ true
426
499
 
500
+ end
427
501
 
428
502
 
429
- #== Helper methods that don't require the obo_parser library
503
+ #== Helper methods that don't require the obo_parser library
430
504
 
431
505
  # Given a Term id and a String representing an OBO file returns that stanza.
432
506
  #
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{obo_parser}
8
- s.version = "0.3.6"
8
+ s.version = "0.3.7"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["mjy"]
12
- s.date = %q{2011-08-10}
12
+ s.date = %q{2011-08-25}
13
13
  s.description = %q{Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. }
14
14
  s.email = %q{diapriid@gmail.com}
15
15
  s.extra_rdoc_files = [
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: obo_parser
3
3
  version: !ruby/object:Gem::Version
4
- hash: 31
4
+ hash: 29
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 3
9
- - 6
10
- version: 0.3.6
9
+ - 7
10
+ version: 0.3.7
11
11
  platform: ruby
12
12
  authors:
13
13
  - mjy
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-08-10 00:00:00 Z
18
+ date: 2011-08-25 00:00:00 Z
19
19
  dependencies: []
20
20
 
21
21
  description: "Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. "