obo_parser 0.3.5 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +6 -4
- data/VERSION +1 -1
- data/lib/obo_parser.rb +16 -4
- data/lib/utilities.rb +200 -4
- data/obo_parser.gemspec +2 -2
- metadata +4 -4
data/README.rdoc
CHANGED
|
@@ -14,8 +14,10 @@ A simple Ruby gem for parsing OBO 1.2 (?4) formatted ontology files. Useful for
|
|
|
14
14
|
require 'obo_parser'
|
|
15
15
|
foo = parse_obo_file(File.read('my_ontology.obo')) # => An OboParser instance
|
|
16
16
|
first_term = foo.terms.first # => An OboParser#Term instance
|
|
17
|
-
|
|
17
|
+
|
|
18
|
+
first_term.id.value # => 'HAO:1234'
|
|
18
19
|
|
|
20
|
+
d = first_term.def # => An OboParser#Tag instance
|
|
19
21
|
d.tag # => 'def'
|
|
20
22
|
d.value # => 'Some defintition'
|
|
21
23
|
d.xrefs # => ['xref:123', 'xref:456']
|
|
@@ -24,7 +26,7 @@ A simple Ruby gem for parsing OBO 1.2 (?4) formatted ontology files. Useful for
|
|
|
24
26
|
t = first_term.name # => An OboParser#Tag instance
|
|
25
27
|
t.tag # => 'name'
|
|
26
28
|
t.value # => 'Some Term name'
|
|
27
|
-
|
|
29
|
+
|
|
28
30
|
o = first_term.other_tags # => [OboParser#Tag, ... ] An array of tags that are not specially referenced in an OboParser::Stanza
|
|
29
31
|
o.first # => An OboParser#Tag instance
|
|
30
32
|
|
|
@@ -36,7 +38,7 @@ A simple Ruby gem for parsing OBO 1.2 (?4) formatted ontology files. Useful for
|
|
|
36
38
|
foo.terms.first.tags_named('synonym').first.tag # => 'synonym'
|
|
37
39
|
foo.terms.first.tags_named('synonym').first.value # => 'Some label'
|
|
38
40
|
|
|
39
|
-
foo.terms.first.relationships # => [['
|
|
41
|
+
foo.terms.first.relationships # => [['relationship', 'FOO:123'], ['other_relationship', 'FOO:456'] ...] An array of [relation, related term id], includes 'is_a', 'disjoint_from' and Typedefs
|
|
40
42
|
|
|
41
43
|
=== Convenience methods
|
|
42
44
|
|
|
@@ -47,7 +49,7 @@ See also /test/test_obo_parser.rb
|
|
|
47
49
|
|
|
48
50
|
== Utilties
|
|
49
51
|
|
|
50
|
-
A small set of methods (e.g. comparing OBO ontologies) utilizing the gem are included in /lib/utilities.rb. For example
|
|
52
|
+
A small set of methods (e.g. comparing OBO ontologies) utilizing the gem are included in /lib/utilities.rb. For example: 1) shared labels across sets of ontologies can be found and returned, 2) ontologies can be dumped into a simple Cytoscape node/edge format; 3) given a set of correspondances between two ontologies various reports can be made.
|
|
51
53
|
|
|
52
54
|
== Documentation
|
|
53
55
|
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.3.
|
|
1
|
+
0.3.6
|
data/lib/obo_parser.rb
CHANGED
|
@@ -26,14 +26,25 @@ module OboParser
|
|
|
26
26
|
end
|
|
27
27
|
|
|
28
28
|
# Warning! This assumes terms are unique, they are NOT required to be so in an OBO file.
|
|
29
|
+
# Ignores hash colisions!!
|
|
29
30
|
def term_hash # :yields: Hash (String => String) (name => id)
|
|
30
31
|
@terms.inject({}) {|sum, t| sum.update(t.name.value => t.id.value)}
|
|
31
32
|
end
|
|
32
33
|
|
|
33
|
-
|
|
34
|
+
# Returns a hash of 'id:012345' => 'term label'
|
|
35
|
+
#
|
|
36
|
+
# @return [Hash] a hash of {id => string} for the file
|
|
37
|
+
def id_hash
|
|
34
38
|
@terms.inject({}) {|sum, t| sum.update(t.id.value => t.name.value)}
|
|
35
39
|
end
|
|
36
40
|
|
|
41
|
+
# Returns a hash of 'id:012345' => Term
|
|
42
|
+
#
|
|
43
|
+
# @return [Hash] a hash of {id => Term} for the file
|
|
44
|
+
def id_index
|
|
45
|
+
@terms.inject({}) {|sum, t| sum.update(t.id.value => t)}
|
|
46
|
+
end
|
|
47
|
+
|
|
37
48
|
# A single line in a Stanza within an OBO file
|
|
38
49
|
class Tag
|
|
39
50
|
attr_accessor :tag, :value, :xrefs, :comment, :qualifier, :related_term, :relation
|
|
@@ -108,6 +119,10 @@ module OboParser
|
|
|
108
119
|
end
|
|
109
120
|
@other_tags = anonymous_tags
|
|
110
121
|
end
|
|
122
|
+
|
|
123
|
+
#def relationships_of_type(reltype = nil)
|
|
124
|
+
# return [] if reltype.nil?
|
|
125
|
+
#end
|
|
111
126
|
end
|
|
112
127
|
|
|
113
128
|
class Typedef < Stanza
|
|
@@ -160,9 +175,6 @@ def parse_obo_file(input)
|
|
|
160
175
|
@input = input
|
|
161
176
|
raise(OboParser::ParseError, "Nothing passed to parse!") if !@input || @input.size == 0
|
|
162
177
|
|
|
163
|
-
# Comments are handled now.
|
|
164
|
-
# @input.gsub!(/(\s*?![^!'"]*?\n)/i, "\n") # strip out comments - this is a kludge, likely needs fixing!!
|
|
165
|
-
|
|
166
178
|
builder = OboParser::OboParserBuilder.new
|
|
167
179
|
lexer = OboParser::Lexer.new(@input)
|
|
168
180
|
OboParser::Parser.new(lexer, builder).parse_file
|
data/lib/utilities.rb
CHANGED
|
@@ -81,7 +81,7 @@ module OboParser::Utilities
|
|
|
81
81
|
end
|
|
82
82
|
end
|
|
83
83
|
|
|
84
|
-
puts
|
|
84
|
+
puts match.sort.join("\n")
|
|
85
85
|
puts "\n#{match.length} total."
|
|
86
86
|
|
|
87
87
|
end
|
|
@@ -112,7 +112,6 @@ is_anti_symmetric: true
|
|
|
112
112
|
# file = File.read('HAO_TGMA_list.txt')
|
|
113
113
|
# col1_obo = File.read('hao.obo')
|
|
114
114
|
# col2_obo = File.read('tgma.obo')
|
|
115
|
-
# column_translate(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo, :output => :homolonto)
|
|
116
115
|
#
|
|
117
116
|
# OboParser::Utilities.column_translate(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo, :output => :homolonto)
|
|
118
117
|
#== Output types
|
|
@@ -129,8 +128,9 @@ is_anti_symmetric: true
|
|
|
129
128
|
:data => nil,
|
|
130
129
|
:col1_obo => nil,
|
|
131
130
|
:col2_obo => nil,
|
|
132
|
-
:translate_to => :id,
|
|
133
|
-
:output => :cols,
|
|
131
|
+
:translate_to => :id, # also :label
|
|
132
|
+
:output => :cols, # also :xls, :homolonto, :parent_match
|
|
133
|
+
:parent_match_to => :is_a, # only used when :output == :parent_match
|
|
134
134
|
:output_filename => 'foo',
|
|
135
135
|
:index_start => 0
|
|
136
136
|
}.merge!(options)
|
|
@@ -212,6 +212,65 @@ is_anti_symmetric: true
|
|
|
212
212
|
true
|
|
213
213
|
end
|
|
214
214
|
|
|
215
|
+
# Takes a two column input file, references it to two ontologies, and returns a hash
|
|
216
|
+
#
|
|
217
|
+
#== Example use
|
|
218
|
+
# file = File.read('HAO_TGMA_list.txt')
|
|
219
|
+
# col1_obo = File.read('hao.obo')
|
|
220
|
+
# col2_obo = File.read('tgma.obo')
|
|
221
|
+
#
|
|
222
|
+
# OboParser::Utilities.hashify_pairs(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo)
|
|
223
|
+
#
|
|
224
|
+
# @param [Hash] options options.
|
|
225
|
+
# @param [Symbol] data the two column data file.
|
|
226
|
+
# @param [Symbol] colo1_obo the OBO file referenced in the first column
|
|
227
|
+
# @param [Symbol] colo2_obo the OBO file referenced in the second column
|
|
228
|
+
# @return [Hash] a hash of {id string => id string}
|
|
229
|
+
def self.hashify_pairs(options = {})
|
|
230
|
+
opt = {
|
|
231
|
+
:data => nil,
|
|
232
|
+
:col1_obo => nil,
|
|
233
|
+
:col2_obo => nil,
|
|
234
|
+
}.merge!(options)
|
|
235
|
+
|
|
236
|
+
c1obo = parse_obo_file(opt[:col1_obo])
|
|
237
|
+
c2obo = parse_obo_file(opt[:col2_obo])
|
|
238
|
+
|
|
239
|
+
hash = Hash.new
|
|
240
|
+
|
|
241
|
+
i = opt[:index_start]
|
|
242
|
+
v1 = nil # a label like 'head'
|
|
243
|
+
v2 = nil
|
|
244
|
+
c1 = nil # an id 'FOO:123'
|
|
245
|
+
c2 = nil
|
|
246
|
+
|
|
247
|
+
opt[:data].split(/\n/).each do |row|
|
|
248
|
+
i += 1
|
|
249
|
+
c1, c2 = row.split(/\t/).map(&:strip)
|
|
250
|
+
|
|
251
|
+
if c1.nil? || c2.nil?
|
|
252
|
+
next
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
# the conversion
|
|
256
|
+
if c1 =~ /.*\:.*/ # it's an id, leave it
|
|
257
|
+
v1 = c1
|
|
258
|
+
else
|
|
259
|
+
v1 = c1obo.term_hash[c1]
|
|
260
|
+
end
|
|
261
|
+
if c2 =~ /.*\:.*/
|
|
262
|
+
v2 = c2
|
|
263
|
+
else
|
|
264
|
+
v2 = c2obo.term_hash[c2]
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
hash.merge!(c1 => c2)
|
|
268
|
+
|
|
269
|
+
end
|
|
270
|
+
return hash
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
|
|
215
274
|
# Returns a HomolOnto Stanza
|
|
216
275
|
#
|
|
217
276
|
# @param [String] id an externally tracked id for the id: tag like '00001'
|
|
@@ -230,6 +289,143 @@ is_anti_symmetric: true
|
|
|
230
289
|
s.join("\n")
|
|
231
290
|
end
|
|
232
291
|
|
|
292
|
+
|
|
293
|
+
# Takes a Hash of OBO ontology files, an Array of relationships, and writes two input files (a network, and node properties) for Cytoscape
|
|
294
|
+
#
|
|
295
|
+
#== Example use
|
|
296
|
+
# OboParser::Utilities.cytoscapify(:ontologies => {'HAO' => File.read('input/hao.obo'), 'TADS' => File.read('input/tads.obo'), 'TGMA' => File.read('input/tgma.obo'), 'FBBT' => File.read('input/fbbt.obo') }, :properties => ['is_a', 'part_of'])
|
|
297
|
+
#
|
|
298
|
+
# @param [Symbol] ontologies a Hash of #read files as values, keys as working names
|
|
299
|
+
# @param [Symbol] properties an Array of properties like ['is_a', 'part_of']
|
|
300
|
+
# TODO: @return File1, File2, Filen
|
|
301
|
+
def self.cytoscapify(options = {})
|
|
302
|
+
opt = {
|
|
303
|
+
:ontologies => {},
|
|
304
|
+
:properties => []
|
|
305
|
+
}.merge!(options)
|
|
306
|
+
|
|
307
|
+
return false if opt[:properties].empty?
|
|
308
|
+
return false if opt[:ontologies].empty?
|
|
309
|
+
|
|
310
|
+
nodes = File.new("nodes.tab", "w+")
|
|
311
|
+
edges = File.new("edges.eda", "w+")
|
|
312
|
+
|
|
313
|
+
opt[:ontologies].keys.each do |k|
|
|
314
|
+
|
|
315
|
+
obo_file = parse_obo_file(opt[:ontologies][k])
|
|
316
|
+
|
|
317
|
+
obo_file.terms.each do |t|
|
|
318
|
+
nodes.puts [t.id.value, t.name.value, k].join("\t") + "\n"
|
|
319
|
+
|
|
320
|
+
t.relationships.each do |rel, id|
|
|
321
|
+
edges.puts [t.id.value, "(#{rel})", id].join("\t") + "\n" if opt[:properties].include?(rel)
|
|
322
|
+
end
|
|
323
|
+
end
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
nodes.close
|
|
327
|
+
edges.close
|
|
328
|
+
|
|
329
|
+
true
|
|
330
|
+
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
# Takes a two column input file, references it to two ontologies, and returns a report
|
|
335
|
+
# that identifies data pairs that have parents who are also a data pair given a
|
|
336
|
+
# provided property/relation type.
|
|
337
|
+
#
|
|
338
|
+
#== Example use
|
|
339
|
+
# file = File.read('HAO_TGMA_list.txt')
|
|
340
|
+
# col1_obo = File.read('hao.obo')
|
|
341
|
+
# col2_obo = File.read('tgma.obo')
|
|
342
|
+
#
|
|
343
|
+
# foo = OboParser::Utilities.parents(:data => data, :col1_obo => col1_obo, :col2_obo => col2_obo, :property => 'is_a')
|
|
344
|
+
#
|
|
345
|
+
# puts "-- NO (#{foo[:no].size})\n"
|
|
346
|
+
# puts foo[:no].join("\n")
|
|
347
|
+
# puts "-- YES (#{foo[:yes].size})\n"
|
|
348
|
+
# puts foo[:yes].join("\n")
|
|
349
|
+
#
|
|
350
|
+
# @param [Hash] options options.
|
|
351
|
+
# @param [Symbol] data the two column data file.
|
|
352
|
+
# @param [Symbol] colo1_obo the OBO file referenced in the first column
|
|
353
|
+
# @param [Symbol] colo2_obo the OBO file referenced in the second column
|
|
354
|
+
# @param [Symbol] property the OBO relationship/property to check against (e.g. 'is_a', 'part_of')
|
|
355
|
+
# @return [Hash] a hash of {:yes => {}, :no => {}}
|
|
356
|
+
def self.parents(options = {})
|
|
357
|
+
opt = {
|
|
358
|
+
:data => nil,
|
|
359
|
+
:col1_obo => nil,
|
|
360
|
+
:col2_obo => nil,
|
|
361
|
+
:property => nil
|
|
362
|
+
}.merge!(options)
|
|
363
|
+
|
|
364
|
+
return false if opt[:property].nil?
|
|
365
|
+
c1obo = parse_obo_file(opt[:col1_obo])
|
|
366
|
+
c2obo = parse_obo_file(opt[:col2_obo])
|
|
367
|
+
|
|
368
|
+
result = {:yes => [], :no => [], :unplaced => []}
|
|
369
|
+
# update
|
|
370
|
+
hash = hashify_pairs(:data => opt[:data], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])
|
|
371
|
+
|
|
372
|
+
obo1_hash = c1obo.id_index
|
|
373
|
+
obo2_hash = c2obo.id_index
|
|
374
|
+
|
|
375
|
+
hash.keys.each do |k|
|
|
376
|
+
a = k
|
|
377
|
+
b = hash[a]
|
|
378
|
+
|
|
379
|
+
ids_1 = []
|
|
380
|
+
ids_2 = []
|
|
381
|
+
|
|
382
|
+
if !obo1_hash[a]
|
|
383
|
+
puts "can't find #{k}\n"
|
|
384
|
+
next
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
if !obo2_hash[b]
|
|
388
|
+
puts "can't find #{k}\n"
|
|
389
|
+
next
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
obo1_hash[a].relationships.each do |rel, id|
|
|
393
|
+
if rel == opt[:property]
|
|
394
|
+
ids_1.push id
|
|
395
|
+
end
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
obo2_hash[b].relationships.each do |rel, id|
|
|
399
|
+
if rel == opt[:property]
|
|
400
|
+
ids_2.push id
|
|
401
|
+
end
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
unplaced = true
|
|
405
|
+
|
|
406
|
+
ids_1.each do |c|
|
|
407
|
+
ids_2.each do |d|
|
|
408
|
+
t = "#{a} -> #{b}"
|
|
409
|
+
if hash[c] == d
|
|
410
|
+
result[:yes].push(t)
|
|
411
|
+
unplaced = false
|
|
412
|
+
next # don't add again after we find a hit
|
|
413
|
+
else
|
|
414
|
+
result[:no].push(t)
|
|
415
|
+
unplaced = false
|
|
416
|
+
end
|
|
417
|
+
end
|
|
418
|
+
end
|
|
419
|
+
result[:unplaced]
|
|
420
|
+
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
result
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
|
|
233
429
|
#== Helper methods that don't require the obo_parser library
|
|
234
430
|
|
|
235
431
|
# Given a Term id and a String representing an OBO file returns that stanza.
|
data/obo_parser.gemspec
CHANGED
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
|
|
6
6
|
Gem::Specification.new do |s|
|
|
7
7
|
s.name = %q{obo_parser}
|
|
8
|
-
s.version = "0.3.
|
|
8
|
+
s.version = "0.3.6"
|
|
9
9
|
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
11
|
s.authors = ["mjy"]
|
|
12
|
-
s.date = %q{2011-
|
|
12
|
+
s.date = %q{2011-08-10}
|
|
13
13
|
s.description = %q{Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. }
|
|
14
14
|
s.email = %q{diapriid@gmail.com}
|
|
15
15
|
s.extra_rdoc_files = [
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: obo_parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
hash:
|
|
4
|
+
hash: 31
|
|
5
5
|
prerelease:
|
|
6
6
|
segments:
|
|
7
7
|
- 0
|
|
8
8
|
- 3
|
|
9
|
-
-
|
|
10
|
-
version: 0.3.
|
|
9
|
+
- 6
|
|
10
|
+
version: 0.3.6
|
|
11
11
|
platform: ruby
|
|
12
12
|
authors:
|
|
13
13
|
- mjy
|
|
@@ -15,7 +15,7 @@ autorequire:
|
|
|
15
15
|
bindir: bin
|
|
16
16
|
cert_chain: []
|
|
17
17
|
|
|
18
|
-
date: 2011-
|
|
18
|
+
date: 2011-08-10 00:00:00 Z
|
|
19
19
|
dependencies: []
|
|
20
20
|
|
|
21
21
|
description: "Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. "
|