obo_parser 0.3.5 → 0.3.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -14,8 +14,10 @@ A simple Ruby gem for parsing OBO 1.2 (?4) formatted ontology files. Useful for
14
14
  require 'obo_parser'
15
15
  foo = parse_obo_file(File.read('my_ontology.obo')) # => An OboParser instance
16
16
  first_term = foo.terms.first # => An OboParser#Term instance
17
- d = first_term.def # => An OboParser#Tag instance
17
+
18
+ first_term.id.value # => 'HAO:1234'
18
19
 
20
+ d = first_term.def # => An OboParser#Tag instance
19
21
  d.tag # => 'def'
20
22
  d.value # => 'Some defintition'
21
23
  d.xrefs # => ['xref:123', 'xref:456']
@@ -24,7 +26,7 @@ A simple Ruby gem for parsing OBO 1.2 (?4) formatted ontology files. Useful for
24
26
  t = first_term.name # => An OboParser#Tag instance
25
27
  t.tag # => 'name'
26
28
  t.value # => 'Some Term name'
27
-
29
+
28
30
  o = first_term.other_tags # => [OboParser#Tag, ... ] An array of tags that are not specially referenced in an OboParser::Stanza
29
31
  o.first # => An OboParser#Tag instance
30
32
 
@@ -36,7 +38,7 @@ A simple Ruby gem for parsing OBO 1.2 (?4) formatted ontology files. Useful for
36
38
  foo.terms.first.tags_named('synonym').first.tag # => 'synonym'
37
39
  foo.terms.first.tags_named('synonym').first.value # => 'Some label'
38
40
 
39
- foo.terms.first.relationships # => [['relation_ship', 'FOO:123'], ['other_relationship', 'FOO:456'] ...] An array of [relation, related term id], includes 'is_a', 'disjoint_from' and Typedefs
41
+ foo.terms.first.relationships # => [['relationship', 'FOO:123'], ['other_relationship', 'FOO:456'] ...] An array of [relation, related term id], includes 'is_a', 'disjoint_from' and Typedefs
40
42
 
41
43
  === Convenience methods
42
44
 
@@ -47,7 +49,7 @@ See also /test/test_obo_parser.rb
47
49
 
48
50
  == Utilties
49
51
 
50
- A small set of methods (e.g. comparing OBO ontologies) utilizing the gem are included in /lib/utilities.rb. For example, shared labels across sets of ontologies can be found and returned.
52
+ A small set of methods (e.g. comparing OBO ontologies) utilizing the gem are included in /lib/utilities.rb. For example: 1) shared labels across sets of ontologies can be found and returned, 2) ontologies can be dumped into a simple Cytoscape node/edge format; 3) given a set of correspondances between two ontologies various reports can be made.
51
53
 
52
54
  == Documentation
53
55
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.5
1
+ 0.3.6
@@ -26,14 +26,25 @@ module OboParser
26
26
  end
27
27
 
28
28
  # Warning! This assumes terms are unique, they are NOT required to be so in an OBO file.
29
+ # Ignores hash colisions!!
29
30
  def term_hash # :yields: Hash (String => String) (name => id)
30
31
  @terms.inject({}) {|sum, t| sum.update(t.name.value => t.id.value)}
31
32
  end
32
33
 
33
- def id_hash # :yields: Hash (String => String (id => name))
34
+ # Returns a hash of 'id:012345' => 'term label'
35
+ #
36
+ # @return [Hash] a hash of {id => string} for the file
37
+ def id_hash
34
38
  @terms.inject({}) {|sum, t| sum.update(t.id.value => t.name.value)}
35
39
  end
36
40
 
41
+ # Returns a hash of 'id:012345' => Term
42
+ #
43
+ # @return [Hash] a hash of {id => Term} for the file
44
+ def id_index
45
+ @terms.inject({}) {|sum, t| sum.update(t.id.value => t)}
46
+ end
47
+
37
48
  # A single line in a Stanza within an OBO file
38
49
  class Tag
39
50
  attr_accessor :tag, :value, :xrefs, :comment, :qualifier, :related_term, :relation
@@ -108,6 +119,10 @@ module OboParser
108
119
  end
109
120
  @other_tags = anonymous_tags
110
121
  end
122
+
123
+ #def relationships_of_type(reltype = nil)
124
+ # return [] if reltype.nil?
125
+ #end
111
126
  end
112
127
 
113
128
  class Typedef < Stanza
@@ -160,9 +175,6 @@ def parse_obo_file(input)
160
175
  @input = input
161
176
  raise(OboParser::ParseError, "Nothing passed to parse!") if !@input || @input.size == 0
162
177
 
163
- # Comments are handled now.
164
- # @input.gsub!(/(\s*?![^!'"]*?\n)/i, "\n") # strip out comments - this is a kludge, likely needs fixing!!
165
-
166
178
  builder = OboParser::OboParserBuilder.new
167
179
  lexer = OboParser::Lexer.new(@input)
168
180
  OboParser::Parser.new(lexer, builder).parse_file
@@ -81,7 +81,7 @@ module OboParser::Utilities
81
81
  end
82
82
  end
83
83
 
84
- puts match.sort.join("\n")
84
+ puts match.sort.join("\n")
85
85
  puts "\n#{match.length} total."
86
86
 
87
87
  end
@@ -112,7 +112,6 @@ is_anti_symmetric: true
112
112
  # file = File.read('HAO_TGMA_list.txt')
113
113
  # col1_obo = File.read('hao.obo')
114
114
  # col2_obo = File.read('tgma.obo')
115
- # column_translate(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo, :output => :homolonto)
116
115
  #
117
116
  # OboParser::Utilities.column_translate(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo, :output => :homolonto)
118
117
  #== Output types
@@ -129,8 +128,9 @@ is_anti_symmetric: true
129
128
  :data => nil,
130
129
  :col1_obo => nil,
131
130
  :col2_obo => nil,
132
- :translate_to => :id, # also :label
133
- :output => :cols, # also :xls, :homolonto
131
+ :translate_to => :id, # also :label
132
+ :output => :cols, # also :xls, :homolonto, :parent_match
133
+ :parent_match_to => :is_a, # only used when :output == :parent_match
134
134
  :output_filename => 'foo',
135
135
  :index_start => 0
136
136
  }.merge!(options)
@@ -212,6 +212,65 @@ is_anti_symmetric: true
212
212
  true
213
213
  end
214
214
 
215
+ # Takes a two column input file, references it to two ontologies, and returns a hash
216
+ #
217
+ #== Example use
218
+ # file = File.read('HAO_TGMA_list.txt')
219
+ # col1_obo = File.read('hao.obo')
220
+ # col2_obo = File.read('tgma.obo')
221
+ #
222
+ # OboParser::Utilities.hashify_pairs(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo)
223
+ #
224
+ # @param [Hash] options options.
225
+ # @param [Symbol] data the two column data file.
226
+ # @param [Symbol] colo1_obo the OBO file referenced in the first column
227
+ # @param [Symbol] colo2_obo the OBO file referenced in the second column
228
+ # @return [Hash] a hash of {id string => id string}
229
+ def self.hashify_pairs(options = {})
230
+ opt = {
231
+ :data => nil,
232
+ :col1_obo => nil,
233
+ :col2_obo => nil,
234
+ }.merge!(options)
235
+
236
+ c1obo = parse_obo_file(opt[:col1_obo])
237
+ c2obo = parse_obo_file(opt[:col2_obo])
238
+
239
+ hash = Hash.new
240
+
241
+ i = opt[:index_start]
242
+ v1 = nil # a label like 'head'
243
+ v2 = nil
244
+ c1 = nil # an id 'FOO:123'
245
+ c2 = nil
246
+
247
+ opt[:data].split(/\n/).each do |row|
248
+ i += 1
249
+ c1, c2 = row.split(/\t/).map(&:strip)
250
+
251
+ if c1.nil? || c2.nil?
252
+ next
253
+ end
254
+
255
+ # the conversion
256
+ if c1 =~ /.*\:.*/ # it's an id, leave it
257
+ v1 = c1
258
+ else
259
+ v1 = c1obo.term_hash[c1]
260
+ end
261
+ if c2 =~ /.*\:.*/
262
+ v2 = c2
263
+ else
264
+ v2 = c2obo.term_hash[c2]
265
+ end
266
+
267
+ hash.merge!(c1 => c2)
268
+
269
+ end
270
+ return hash
271
+ end
272
+
273
+
215
274
  # Returns a HomolOnto Stanza
216
275
  #
217
276
  # @param [String] id an externally tracked id for the id: tag like '00001'
@@ -230,6 +289,143 @@ is_anti_symmetric: true
230
289
  s.join("\n")
231
290
  end
232
291
 
292
+
293
+ # Takes a Hash of OBO ontology files, an Array of relationships, and writes two input files (a network, and node properties) for Cytoscape
294
+ #
295
+ #== Example use
296
+ # OboParser::Utilities.cytoscapify(:ontologies => {'HAO' => File.read('input/hao.obo'), 'TADS' => File.read('input/tads.obo'), 'TGMA' => File.read('input/tgma.obo'), 'FBBT' => File.read('input/fbbt.obo') }, :properties => ['is_a', 'part_of'])
297
+ #
298
+ # @param [Symbol] ontologies a Hash of #read files as values, keys as working names
299
+ # @param [Symbol] properties an Array of properties like ['is_a', 'part_of']
300
+ # TODO: @return File1, File2, Filen
301
+ def self.cytoscapify(options = {})
302
+ opt = {
303
+ :ontologies => {},
304
+ :properties => []
305
+ }.merge!(options)
306
+
307
+ return false if opt[:properties].empty?
308
+ return false if opt[:ontologies].empty?
309
+
310
+ nodes = File.new("nodes.tab", "w+")
311
+ edges = File.new("edges.eda", "w+")
312
+
313
+ opt[:ontologies].keys.each do |k|
314
+
315
+ obo_file = parse_obo_file(opt[:ontologies][k])
316
+
317
+ obo_file.terms.each do |t|
318
+ nodes.puts [t.id.value, t.name.value, k].join("\t") + "\n"
319
+
320
+ t.relationships.each do |rel, id|
321
+ edges.puts [t.id.value, "(#{rel})", id].join("\t") + "\n" if opt[:properties].include?(rel)
322
+ end
323
+ end
324
+ end
325
+
326
+ nodes.close
327
+ edges.close
328
+
329
+ true
330
+
331
+ end
332
+
333
+
334
+ # Takes a two column input file, references it to two ontologies, and returns a report
335
+ # that identifies data pairs that have parents who are also a data pair given a
336
+ # provided property/relation type.
337
+ #
338
+ #== Example use
339
+ # file = File.read('HAO_TGMA_list.txt')
340
+ # col1_obo = File.read('hao.obo')
341
+ # col2_obo = File.read('tgma.obo')
342
+ #
343
+ # foo = OboParser::Utilities.parents(:data => data, :col1_obo => col1_obo, :col2_obo => col2_obo, :property => 'is_a')
344
+ #
345
+ # puts "-- NO (#{foo[:no].size})\n"
346
+ # puts foo[:no].join("\n")
347
+ # puts "-- YES (#{foo[:yes].size})\n"
348
+ # puts foo[:yes].join("\n")
349
+ #
350
+ # @param [Hash] options options.
351
+ # @param [Symbol] data the two column data file.
352
+ # @param [Symbol] colo1_obo the OBO file referenced in the first column
353
+ # @param [Symbol] colo2_obo the OBO file referenced in the second column
354
+ # @param [Symbol] property the OBO relationship/property to check against (e.g. 'is_a', 'part_of')
355
+ # @return [Hash] a hash of {:yes => {}, :no => {}}
356
+ def self.parents(options = {})
357
+ opt = {
358
+ :data => nil,
359
+ :col1_obo => nil,
360
+ :col2_obo => nil,
361
+ :property => nil
362
+ }.merge!(options)
363
+
364
+ return false if opt[:property].nil?
365
+ c1obo = parse_obo_file(opt[:col1_obo])
366
+ c2obo = parse_obo_file(opt[:col2_obo])
367
+
368
+ result = {:yes => [], :no => [], :unplaced => []}
369
+ # update
370
+ hash = hashify_pairs(:data => opt[:data], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])
371
+
372
+ obo1_hash = c1obo.id_index
373
+ obo2_hash = c2obo.id_index
374
+
375
+ hash.keys.each do |k|
376
+ a = k
377
+ b = hash[a]
378
+
379
+ ids_1 = []
380
+ ids_2 = []
381
+
382
+ if !obo1_hash[a]
383
+ puts "can't find #{k}\n"
384
+ next
385
+ end
386
+
387
+ if !obo2_hash[b]
388
+ puts "can't find #{k}\n"
389
+ next
390
+ end
391
+
392
+ obo1_hash[a].relationships.each do |rel, id|
393
+ if rel == opt[:property]
394
+ ids_1.push id
395
+ end
396
+ end
397
+
398
+ obo2_hash[b].relationships.each do |rel, id|
399
+ if rel == opt[:property]
400
+ ids_2.push id
401
+ end
402
+ end
403
+
404
+ unplaced = true
405
+
406
+ ids_1.each do |c|
407
+ ids_2.each do |d|
408
+ t = "#{a} -> #{b}"
409
+ if hash[c] == d
410
+ result[:yes].push(t)
411
+ unplaced = false
412
+ next # don't add again after we find a hit
413
+ else
414
+ result[:no].push(t)
415
+ unplaced = false
416
+ end
417
+ end
418
+ end
419
+ result[:unplaced]
420
+
421
+ end
422
+
423
+ result
424
+ end
425
+
426
+
427
+
428
+
233
429
  #== Helper methods that don't require the obo_parser library
234
430
 
235
431
  # Given a Term id and a String representing an OBO file returns that stanza.
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{obo_parser}
8
- s.version = "0.3.5"
8
+ s.version = "0.3.6"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["mjy"]
12
- s.date = %q{2011-06-09}
12
+ s.date = %q{2011-08-10}
13
13
  s.description = %q{Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. }
14
14
  s.email = %q{diapriid@gmail.com}
15
15
  s.extra_rdoc_files = [
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: obo_parser
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 31
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 3
9
- - 5
10
- version: 0.3.5
9
+ - 6
10
+ version: 0.3.6
11
11
  platform: ruby
12
12
  authors:
13
13
  - mjy
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-06-09 00:00:00 Z
18
+ date: 2011-08-10 00:00:00 Z
19
19
  dependencies: []
20
20
 
21
21
  description: "Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. "