obo_parser 0.3.5 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,8 +14,10 @@ A simple Ruby gem for parsing OBO 1.2 (?4) formatted ontology files. Useful for
14
14
  require 'obo_parser'
15
15
  foo = parse_obo_file(File.read('my_ontology.obo')) # => An OboParser instance
16
16
  first_term = foo.terms.first # => An OboParser#Term instance
17
- d = first_term.def # => An OboParser#Tag instance
17
+
18
+ first_term.id.value # => 'HAO:1234'
18
19
 
20
+ d = first_term.def # => An OboParser#Tag instance
19
21
  d.tag # => 'def'
20
22
  d.value # => 'Some defintition'
21
23
  d.xrefs # => ['xref:123', 'xref:456']
@@ -24,7 +26,7 @@ A simple Ruby gem for parsing OBO 1.2 (?4) formatted ontology files. Useful for
24
26
  t = first_term.name # => An OboParser#Tag instance
25
27
  t.tag # => 'name'
26
28
  t.value # => 'Some Term name'
27
-
29
+
28
30
  o = first_term.other_tags # => [OboParser#Tag, ... ] An array of tags that are not specially referenced in an OboParser::Stanza
29
31
  o.first # => An OboParser#Tag instance
30
32
 
@@ -36,7 +38,7 @@ A simple Ruby gem for parsing OBO 1.2 (?4) formatted ontology files. Useful for
36
38
  foo.terms.first.tags_named('synonym').first.tag # => 'synonym'
37
39
  foo.terms.first.tags_named('synonym').first.value # => 'Some label'
38
40
 
39
- foo.terms.first.relationships # => [['relation_ship', 'FOO:123'], ['other_relationship', 'FOO:456'] ...] An array of [relation, related term id], includes 'is_a', 'disjoint_from' and Typedefs
41
+ foo.terms.first.relationships # => [['relationship', 'FOO:123'], ['other_relationship', 'FOO:456'] ...] An array of [relation, related term id], includes 'is_a', 'disjoint_from' and Typedefs
40
42
 
41
43
  === Convenience methods
42
44
 
@@ -47,7 +49,7 @@ See also /test/test_obo_parser.rb
47
49
 
48
50
  == Utilties
49
51
 
50
- A small set of methods (e.g. comparing OBO ontologies) utilizing the gem are included in /lib/utilities.rb. For example, shared labels across sets of ontologies can be found and returned.
52
+ A small set of methods (e.g. comparing OBO ontologies) utilizing the gem are included in /lib/utilities.rb. For example: 1) shared labels across sets of ontologies can be found and returned, 2) ontologies can be dumped into a simple Cytoscape node/edge format; 3) given a set of correspondances between two ontologies various reports can be made.
51
53
 
52
54
  == Documentation
53
55
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.5
1
+ 0.3.6
@@ -26,14 +26,25 @@ module OboParser
26
26
  end
27
27
 
28
28
  # Warning! This assumes terms are unique, they are NOT required to be so in an OBO file.
29
+ # Ignores hash colisions!!
29
30
  def term_hash # :yields: Hash (String => String) (name => id)
30
31
  @terms.inject({}) {|sum, t| sum.update(t.name.value => t.id.value)}
31
32
  end
32
33
 
33
- def id_hash # :yields: Hash (String => String (id => name))
34
+ # Returns a hash of 'id:012345' => 'term label'
35
+ #
36
+ # @return [Hash] a hash of {id => string} for the file
37
+ def id_hash
34
38
  @terms.inject({}) {|sum, t| sum.update(t.id.value => t.name.value)}
35
39
  end
36
40
 
41
+ # Returns a hash of 'id:012345' => Term
42
+ #
43
+ # @return [Hash] a hash of {id => Term} for the file
44
+ def id_index
45
+ @terms.inject({}) {|sum, t| sum.update(t.id.value => t)}
46
+ end
47
+
37
48
  # A single line in a Stanza within an OBO file
38
49
  class Tag
39
50
  attr_accessor :tag, :value, :xrefs, :comment, :qualifier, :related_term, :relation
@@ -108,6 +119,10 @@ module OboParser
108
119
  end
109
120
  @other_tags = anonymous_tags
110
121
  end
122
+
123
+ #def relationships_of_type(reltype = nil)
124
+ # return [] if reltype.nil?
125
+ #end
111
126
  end
112
127
 
113
128
  class Typedef < Stanza
@@ -160,9 +175,6 @@ def parse_obo_file(input)
160
175
  @input = input
161
176
  raise(OboParser::ParseError, "Nothing passed to parse!") if !@input || @input.size == 0
162
177
 
163
- # Comments are handled now.
164
- # @input.gsub!(/(\s*?![^!'"]*?\n)/i, "\n") # strip out comments - this is a kludge, likely needs fixing!!
165
-
166
178
  builder = OboParser::OboParserBuilder.new
167
179
  lexer = OboParser::Lexer.new(@input)
168
180
  OboParser::Parser.new(lexer, builder).parse_file
@@ -81,7 +81,7 @@ module OboParser::Utilities
81
81
  end
82
82
  end
83
83
 
84
- puts match.sort.join("\n")
84
+ puts match.sort.join("\n")
85
85
  puts "\n#{match.length} total."
86
86
 
87
87
  end
@@ -112,7 +112,6 @@ is_anti_symmetric: true
112
112
  # file = File.read('HAO_TGMA_list.txt')
113
113
  # col1_obo = File.read('hao.obo')
114
114
  # col2_obo = File.read('tgma.obo')
115
- # column_translate(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo, :output => :homolonto)
116
115
  #
117
116
  # OboParser::Utilities.column_translate(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo, :output => :homolonto)
118
117
  #== Output types
@@ -129,8 +128,9 @@ is_anti_symmetric: true
129
128
  :data => nil,
130
129
  :col1_obo => nil,
131
130
  :col2_obo => nil,
132
- :translate_to => :id, # also :label
133
- :output => :cols, # also :xls, :homolonto
131
+ :translate_to => :id, # also :label
132
+ :output => :cols, # also :xls, :homolonto, :parent_match
133
+ :parent_match_to => :is_a, # only used when :output == :parent_match
134
134
  :output_filename => 'foo',
135
135
  :index_start => 0
136
136
  }.merge!(options)
@@ -212,6 +212,65 @@ is_anti_symmetric: true
212
212
  true
213
213
  end
214
214
 
215
+ # Takes a two column input file, references it to two ontologies, and returns a hash
216
+ #
217
+ #== Example use
218
+ # file = File.read('HAO_TGMA_list.txt')
219
+ # col1_obo = File.read('hao.obo')
220
+ # col2_obo = File.read('tgma.obo')
221
+ #
222
+ # OboParser::Utilities.hashify_pairs(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo)
223
+ #
224
+ # @param [Hash] options options.
225
+ # @param [Symbol] data the two column data file.
226
+ # @param [Symbol] colo1_obo the OBO file referenced in the first column
227
+ # @param [Symbol] colo2_obo the OBO file referenced in the second column
228
+ # @return [Hash] a hash of {id string => id string}
229
+ def self.hashify_pairs(options = {})
230
+ opt = {
231
+ :data => nil,
232
+ :col1_obo => nil,
233
+ :col2_obo => nil,
234
+ }.merge!(options)
235
+
236
+ c1obo = parse_obo_file(opt[:col1_obo])
237
+ c2obo = parse_obo_file(opt[:col2_obo])
238
+
239
+ hash = Hash.new
240
+
241
+ i = opt[:index_start]
242
+ v1 = nil # a label like 'head'
243
+ v2 = nil
244
+ c1 = nil # an id 'FOO:123'
245
+ c2 = nil
246
+
247
+ opt[:data].split(/\n/).each do |row|
248
+ i += 1
249
+ c1, c2 = row.split(/\t/).map(&:strip)
250
+
251
+ if c1.nil? || c2.nil?
252
+ next
253
+ end
254
+
255
+ # the conversion
256
+ if c1 =~ /.*\:.*/ # it's an id, leave it
257
+ v1 = c1
258
+ else
259
+ v1 = c1obo.term_hash[c1]
260
+ end
261
+ if c2 =~ /.*\:.*/
262
+ v2 = c2
263
+ else
264
+ v2 = c2obo.term_hash[c2]
265
+ end
266
+
267
+ hash.merge!(c1 => c2)
268
+
269
+ end
270
+ return hash
271
+ end
272
+
273
+
215
274
  # Returns a HomolOnto Stanza
216
275
  #
217
276
  # @param [String] id an externally tracked id for the id: tag like '00001'
@@ -230,6 +289,143 @@ is_anti_symmetric: true
230
289
  s.join("\n")
231
290
  end
232
291
 
292
+
293
+ # Takes a Hash of OBO ontology files, an Array of relationships, and writes two input files (a network, and node properties) for Cytoscape
294
+ #
295
+ #== Example use
296
+ # OboParser::Utilities.cytoscapify(:ontologies => {'HAO' => File.read('input/hao.obo'), 'TADS' => File.read('input/tads.obo'), 'TGMA' => File.read('input/tgma.obo'), 'FBBT' => File.read('input/fbbt.obo') }, :properties => ['is_a', 'part_of'])
297
+ #
298
+ # @param [Symbol] ontologies a Hash of #read files as values, keys as working names
299
+ # @param [Symbol] properties an Array of properties like ['is_a', 'part_of']
300
+ # TODO: @return File1, File2, Filen
301
+ def self.cytoscapify(options = {})
302
+ opt = {
303
+ :ontologies => {},
304
+ :properties => []
305
+ }.merge!(options)
306
+
307
+ return false if opt[:properties].empty?
308
+ return false if opt[:ontologies].empty?
309
+
310
+ nodes = File.new("nodes.tab", "w+")
311
+ edges = File.new("edges.eda", "w+")
312
+
313
+ opt[:ontologies].keys.each do |k|
314
+
315
+ obo_file = parse_obo_file(opt[:ontologies][k])
316
+
317
+ obo_file.terms.each do |t|
318
+ nodes.puts [t.id.value, t.name.value, k].join("\t") + "\n"
319
+
320
+ t.relationships.each do |rel, id|
321
+ edges.puts [t.id.value, "(#{rel})", id].join("\t") + "\n" if opt[:properties].include?(rel)
322
+ end
323
+ end
324
+ end
325
+
326
+ nodes.close
327
+ edges.close
328
+
329
+ true
330
+
331
+ end
332
+
333
+
334
+ # Takes a two column input file, references it to two ontologies, and returns a report
335
+ # that identifies data pairs that have parents who are also a data pair given a
336
+ # provided property/relation type.
337
+ #
338
+ #== Example use
339
+ # file = File.read('HAO_TGMA_list.txt')
340
+ # col1_obo = File.read('hao.obo')
341
+ # col2_obo = File.read('tgma.obo')
342
+ #
343
+ # foo = OboParser::Utilities.parents(:data => data, :col1_obo => col1_obo, :col2_obo => col2_obo, :property => 'is_a')
344
+ #
345
+ # puts "-- NO (#{foo[:no].size})\n"
346
+ # puts foo[:no].join("\n")
347
+ # puts "-- YES (#{foo[:yes].size})\n"
348
+ # puts foo[:yes].join("\n")
349
+ #
350
+ # @param [Hash] options options.
351
+ # @param [Symbol] data the two column data file.
352
+ # @param [Symbol] colo1_obo the OBO file referenced in the first column
353
+ # @param [Symbol] colo2_obo the OBO file referenced in the second column
354
+ # @param [Symbol] property the OBO relationship/property to check against (e.g. 'is_a', 'part_of')
355
+ # @return [Hash] a hash of {:yes => {}, :no => {}}
356
+ def self.parents(options = {})
357
+ opt = {
358
+ :data => nil,
359
+ :col1_obo => nil,
360
+ :col2_obo => nil,
361
+ :property => nil
362
+ }.merge!(options)
363
+
364
+ return false if opt[:property].nil?
365
+ c1obo = parse_obo_file(opt[:col1_obo])
366
+ c2obo = parse_obo_file(opt[:col2_obo])
367
+
368
+ result = {:yes => [], :no => [], :unplaced => []}
369
+ # update
370
+ hash = hashify_pairs(:data => opt[:data], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])
371
+
372
+ obo1_hash = c1obo.id_index
373
+ obo2_hash = c2obo.id_index
374
+
375
+ hash.keys.each do |k|
376
+ a = k
377
+ b = hash[a]
378
+
379
+ ids_1 = []
380
+ ids_2 = []
381
+
382
+ if !obo1_hash[a]
383
+ puts "can't find #{k}\n"
384
+ next
385
+ end
386
+
387
+ if !obo2_hash[b]
388
+ puts "can't find #{k}\n"
389
+ next
390
+ end
391
+
392
+ obo1_hash[a].relationships.each do |rel, id|
393
+ if rel == opt[:property]
394
+ ids_1.push id
395
+ end
396
+ end
397
+
398
+ obo2_hash[b].relationships.each do |rel, id|
399
+ if rel == opt[:property]
400
+ ids_2.push id
401
+ end
402
+ end
403
+
404
+ unplaced = true
405
+
406
+ ids_1.each do |c|
407
+ ids_2.each do |d|
408
+ t = "#{a} -> #{b}"
409
+ if hash[c] == d
410
+ result[:yes].push(t)
411
+ unplaced = false
412
+ next # don't add again after we find a hit
413
+ else
414
+ result[:no].push(t)
415
+ unplaced = false
416
+ end
417
+ end
418
+ end
419
+ result[:unplaced]
420
+
421
+ end
422
+
423
+ result
424
+ end
425
+
426
+
427
+
428
+
233
429
  #== Helper methods that don't require the obo_parser library
234
430
 
235
431
  # Given a Term id and a String representing an OBO file returns that stanza.
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{obo_parser}
8
- s.version = "0.3.5"
8
+ s.version = "0.3.6"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["mjy"]
12
- s.date = %q{2011-06-09}
12
+ s.date = %q{2011-08-10}
13
13
  s.description = %q{Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. }
14
14
  s.email = %q{diapriid@gmail.com}
15
15
  s.extra_rdoc_files = [
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: obo_parser
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 31
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 3
9
- - 5
10
- version: 0.3.5
9
+ - 6
10
+ version: 0.3.6
11
11
  platform: ruby
12
12
  authors:
13
13
  - mjy
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-06-09 00:00:00 Z
18
+ date: 2011-08-10 00:00:00 Z
19
19
  dependencies: []
20
20
 
21
21
  description: "Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. "