obo_parser 0.3.5 → 0.3.6
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +6 -4
- data/VERSION +1 -1
- data/lib/obo_parser.rb +16 -4
- data/lib/utilities.rb +200 -4
- data/obo_parser.gemspec +2 -2
- metadata +4 -4
data/README.rdoc
CHANGED
@@ -14,8 +14,10 @@ A simple Ruby gem for parsing OBO 1.2 (?4) formatted ontology files. Useful for
|
|
14
14
|
require 'obo_parser'
|
15
15
|
foo = parse_obo_file(File.read('my_ontology.obo')) # => An OboParser instance
|
16
16
|
first_term = foo.terms.first # => An OboParser#Term instance
|
17
|
-
|
17
|
+
|
18
|
+
first_term.id.value # => 'HAO:1234'
|
18
19
|
|
20
|
+
d = first_term.def # => An OboParser#Tag instance
|
19
21
|
d.tag # => 'def'
|
20
22
|
d.value # => 'Some defintition'
|
21
23
|
d.xrefs # => ['xref:123', 'xref:456']
|
@@ -24,7 +26,7 @@ A simple Ruby gem for parsing OBO 1.2 (?4) formatted ontology files. Useful for
|
|
24
26
|
t = first_term.name # => An OboParser#Tag instance
|
25
27
|
t.tag # => 'name'
|
26
28
|
t.value # => 'Some Term name'
|
27
|
-
|
29
|
+
|
28
30
|
o = first_term.other_tags # => [OboParser#Tag, ... ] An array of tags that are not specially referenced in an OboParser::Stanza
|
29
31
|
o.first # => An OboParser#Tag instance
|
30
32
|
|
@@ -36,7 +38,7 @@ A simple Ruby gem for parsing OBO 1.2 (?4) formatted ontology files. Useful for
|
|
36
38
|
foo.terms.first.tags_named('synonym').first.tag # => 'synonym'
|
37
39
|
foo.terms.first.tags_named('synonym').first.value # => 'Some label'
|
38
40
|
|
39
|
-
foo.terms.first.relationships # => [['
|
41
|
+
foo.terms.first.relationships # => [['relationship', 'FOO:123'], ['other_relationship', 'FOO:456'] ...] An array of [relation, related term id], includes 'is_a', 'disjoint_from' and Typedefs
|
40
42
|
|
41
43
|
=== Convenience methods
|
42
44
|
|
@@ -47,7 +49,7 @@ See also /test/test_obo_parser.rb
|
|
47
49
|
|
48
50
|
== Utilties
|
49
51
|
|
50
|
-
A small set of methods (e.g. comparing OBO ontologies) utilizing the gem are included in /lib/utilities.rb. For example
|
52
|
+
A small set of methods (e.g. comparing OBO ontologies) utilizing the gem are included in /lib/utilities.rb. For example: 1) shared labels across sets of ontologies can be found and returned, 2) ontologies can be dumped into a simple Cytoscape node/edge format; 3) given a set of correspondances between two ontologies various reports can be made.
|
51
53
|
|
52
54
|
== Documentation
|
53
55
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.3.
|
1
|
+
0.3.6
|
data/lib/obo_parser.rb
CHANGED
@@ -26,14 +26,25 @@ module OboParser
|
|
26
26
|
end
|
27
27
|
|
28
28
|
# Warning! This assumes terms are unique, they are NOT required to be so in an OBO file.
|
29
|
+
# Ignores hash colisions!!
|
29
30
|
def term_hash # :yields: Hash (String => String) (name => id)
|
30
31
|
@terms.inject({}) {|sum, t| sum.update(t.name.value => t.id.value)}
|
31
32
|
end
|
32
33
|
|
33
|
-
|
34
|
+
# Returns a hash of 'id:012345' => 'term label'
|
35
|
+
#
|
36
|
+
# @return [Hash] a hash of {id => string} for the file
|
37
|
+
def id_hash
|
34
38
|
@terms.inject({}) {|sum, t| sum.update(t.id.value => t.name.value)}
|
35
39
|
end
|
36
40
|
|
41
|
+
# Returns a hash of 'id:012345' => Term
|
42
|
+
#
|
43
|
+
# @return [Hash] a hash of {id => Term} for the file
|
44
|
+
def id_index
|
45
|
+
@terms.inject({}) {|sum, t| sum.update(t.id.value => t)}
|
46
|
+
end
|
47
|
+
|
37
48
|
# A single line in a Stanza within an OBO file
|
38
49
|
class Tag
|
39
50
|
attr_accessor :tag, :value, :xrefs, :comment, :qualifier, :related_term, :relation
|
@@ -108,6 +119,10 @@ module OboParser
|
|
108
119
|
end
|
109
120
|
@other_tags = anonymous_tags
|
110
121
|
end
|
122
|
+
|
123
|
+
#def relationships_of_type(reltype = nil)
|
124
|
+
# return [] if reltype.nil?
|
125
|
+
#end
|
111
126
|
end
|
112
127
|
|
113
128
|
class Typedef < Stanza
|
@@ -160,9 +175,6 @@ def parse_obo_file(input)
|
|
160
175
|
@input = input
|
161
176
|
raise(OboParser::ParseError, "Nothing passed to parse!") if !@input || @input.size == 0
|
162
177
|
|
163
|
-
# Comments are handled now.
|
164
|
-
# @input.gsub!(/(\s*?![^!'"]*?\n)/i, "\n") # strip out comments - this is a kludge, likely needs fixing!!
|
165
|
-
|
166
178
|
builder = OboParser::OboParserBuilder.new
|
167
179
|
lexer = OboParser::Lexer.new(@input)
|
168
180
|
OboParser::Parser.new(lexer, builder).parse_file
|
data/lib/utilities.rb
CHANGED
@@ -81,7 +81,7 @@ module OboParser::Utilities
|
|
81
81
|
end
|
82
82
|
end
|
83
83
|
|
84
|
-
puts
|
84
|
+
puts match.sort.join("\n")
|
85
85
|
puts "\n#{match.length} total."
|
86
86
|
|
87
87
|
end
|
@@ -112,7 +112,6 @@ is_anti_symmetric: true
|
|
112
112
|
# file = File.read('HAO_TGMA_list.txt')
|
113
113
|
# col1_obo = File.read('hao.obo')
|
114
114
|
# col2_obo = File.read('tgma.obo')
|
115
|
-
# column_translate(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo, :output => :homolonto)
|
116
115
|
#
|
117
116
|
# OboParser::Utilities.column_translate(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo, :output => :homolonto)
|
118
117
|
#== Output types
|
@@ -129,8 +128,9 @@ is_anti_symmetric: true
|
|
129
128
|
:data => nil,
|
130
129
|
:col1_obo => nil,
|
131
130
|
:col2_obo => nil,
|
132
|
-
:translate_to => :id,
|
133
|
-
:output => :cols,
|
131
|
+
:translate_to => :id, # also :label
|
132
|
+
:output => :cols, # also :xls, :homolonto, :parent_match
|
133
|
+
:parent_match_to => :is_a, # only used when :output == :parent_match
|
134
134
|
:output_filename => 'foo',
|
135
135
|
:index_start => 0
|
136
136
|
}.merge!(options)
|
@@ -212,6 +212,65 @@ is_anti_symmetric: true
|
|
212
212
|
true
|
213
213
|
end
|
214
214
|
|
215
|
+
# Takes a two column input file, references it to two ontologies, and returns a hash
|
216
|
+
#
|
217
|
+
#== Example use
|
218
|
+
# file = File.read('HAO_TGMA_list.txt')
|
219
|
+
# col1_obo = File.read('hao.obo')
|
220
|
+
# col2_obo = File.read('tgma.obo')
|
221
|
+
#
|
222
|
+
# OboParser::Utilities.hashify_pairs(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo)
|
223
|
+
#
|
224
|
+
# @param [Hash] options options.
|
225
|
+
# @param [Symbol] data the two column data file.
|
226
|
+
# @param [Symbol] colo1_obo the OBO file referenced in the first column
|
227
|
+
# @param [Symbol] colo2_obo the OBO file referenced in the second column
|
228
|
+
# @return [Hash] a hash of {id string => id string}
|
229
|
+
def self.hashify_pairs(options = {})
|
230
|
+
opt = {
|
231
|
+
:data => nil,
|
232
|
+
:col1_obo => nil,
|
233
|
+
:col2_obo => nil,
|
234
|
+
}.merge!(options)
|
235
|
+
|
236
|
+
c1obo = parse_obo_file(opt[:col1_obo])
|
237
|
+
c2obo = parse_obo_file(opt[:col2_obo])
|
238
|
+
|
239
|
+
hash = Hash.new
|
240
|
+
|
241
|
+
i = opt[:index_start]
|
242
|
+
v1 = nil # a label like 'head'
|
243
|
+
v2 = nil
|
244
|
+
c1 = nil # an id 'FOO:123'
|
245
|
+
c2 = nil
|
246
|
+
|
247
|
+
opt[:data].split(/\n/).each do |row|
|
248
|
+
i += 1
|
249
|
+
c1, c2 = row.split(/\t/).map(&:strip)
|
250
|
+
|
251
|
+
if c1.nil? || c2.nil?
|
252
|
+
next
|
253
|
+
end
|
254
|
+
|
255
|
+
# the conversion
|
256
|
+
if c1 =~ /.*\:.*/ # it's an id, leave it
|
257
|
+
v1 = c1
|
258
|
+
else
|
259
|
+
v1 = c1obo.term_hash[c1]
|
260
|
+
end
|
261
|
+
if c2 =~ /.*\:.*/
|
262
|
+
v2 = c2
|
263
|
+
else
|
264
|
+
v2 = c2obo.term_hash[c2]
|
265
|
+
end
|
266
|
+
|
267
|
+
hash.merge!(c1 => c2)
|
268
|
+
|
269
|
+
end
|
270
|
+
return hash
|
271
|
+
end
|
272
|
+
|
273
|
+
|
215
274
|
# Returns a HomolOnto Stanza
|
216
275
|
#
|
217
276
|
# @param [String] id an externally tracked id for the id: tag like '00001'
|
@@ -230,6 +289,143 @@ is_anti_symmetric: true
|
|
230
289
|
s.join("\n")
|
231
290
|
end
|
232
291
|
|
292
|
+
|
293
|
+
# Takes a Hash of OBO ontology files, an Array of relationships, and writes two input files (a network, and node properties) for Cytoscape
|
294
|
+
#
|
295
|
+
#== Example use
|
296
|
+
# OboParser::Utilities.cytoscapify(:ontologies => {'HAO' => File.read('input/hao.obo'), 'TADS' => File.read('input/tads.obo'), 'TGMA' => File.read('input/tgma.obo'), 'FBBT' => File.read('input/fbbt.obo') }, :properties => ['is_a', 'part_of'])
|
297
|
+
#
|
298
|
+
# @param [Symbol] ontologies a Hash of #read files as values, keys as working names
|
299
|
+
# @param [Symbol] properties an Array of properties like ['is_a', 'part_of']
|
300
|
+
# TODO: @return File1, File2, Filen
|
301
|
+
def self.cytoscapify(options = {})
|
302
|
+
opt = {
|
303
|
+
:ontologies => {},
|
304
|
+
:properties => []
|
305
|
+
}.merge!(options)
|
306
|
+
|
307
|
+
return false if opt[:properties].empty?
|
308
|
+
return false if opt[:ontologies].empty?
|
309
|
+
|
310
|
+
nodes = File.new("nodes.tab", "w+")
|
311
|
+
edges = File.new("edges.eda", "w+")
|
312
|
+
|
313
|
+
opt[:ontologies].keys.each do |k|
|
314
|
+
|
315
|
+
obo_file = parse_obo_file(opt[:ontologies][k])
|
316
|
+
|
317
|
+
obo_file.terms.each do |t|
|
318
|
+
nodes.puts [t.id.value, t.name.value, k].join("\t") + "\n"
|
319
|
+
|
320
|
+
t.relationships.each do |rel, id|
|
321
|
+
edges.puts [t.id.value, "(#{rel})", id].join("\t") + "\n" if opt[:properties].include?(rel)
|
322
|
+
end
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|
326
|
+
nodes.close
|
327
|
+
edges.close
|
328
|
+
|
329
|
+
true
|
330
|
+
|
331
|
+
end
|
332
|
+
|
333
|
+
|
334
|
+
# Takes a two column input file, references it to two ontologies, and returns a report
|
335
|
+
# that identifies data pairs that have parents who are also a data pair given a
|
336
|
+
# provided property/relation type.
|
337
|
+
#
|
338
|
+
#== Example use
|
339
|
+
# file = File.read('HAO_TGMA_list.txt')
|
340
|
+
# col1_obo = File.read('hao.obo')
|
341
|
+
# col2_obo = File.read('tgma.obo')
|
342
|
+
#
|
343
|
+
# foo = OboParser::Utilities.parents(:data => data, :col1_obo => col1_obo, :col2_obo => col2_obo, :property => 'is_a')
|
344
|
+
#
|
345
|
+
# puts "-- NO (#{foo[:no].size})\n"
|
346
|
+
# puts foo[:no].join("\n")
|
347
|
+
# puts "-- YES (#{foo[:yes].size})\n"
|
348
|
+
# puts foo[:yes].join("\n")
|
349
|
+
#
|
350
|
+
# @param [Hash] options options.
|
351
|
+
# @param [Symbol] data the two column data file.
|
352
|
+
# @param [Symbol] colo1_obo the OBO file referenced in the first column
|
353
|
+
# @param [Symbol] colo2_obo the OBO file referenced in the second column
|
354
|
+
# @param [Symbol] property the OBO relationship/property to check against (e.g. 'is_a', 'part_of')
|
355
|
+
# @return [Hash] a hash of {:yes => {}, :no => {}}
|
356
|
+
def self.parents(options = {})
|
357
|
+
opt = {
|
358
|
+
:data => nil,
|
359
|
+
:col1_obo => nil,
|
360
|
+
:col2_obo => nil,
|
361
|
+
:property => nil
|
362
|
+
}.merge!(options)
|
363
|
+
|
364
|
+
return false if opt[:property].nil?
|
365
|
+
c1obo = parse_obo_file(opt[:col1_obo])
|
366
|
+
c2obo = parse_obo_file(opt[:col2_obo])
|
367
|
+
|
368
|
+
result = {:yes => [], :no => [], :unplaced => []}
|
369
|
+
# update
|
370
|
+
hash = hashify_pairs(:data => opt[:data], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])
|
371
|
+
|
372
|
+
obo1_hash = c1obo.id_index
|
373
|
+
obo2_hash = c2obo.id_index
|
374
|
+
|
375
|
+
hash.keys.each do |k|
|
376
|
+
a = k
|
377
|
+
b = hash[a]
|
378
|
+
|
379
|
+
ids_1 = []
|
380
|
+
ids_2 = []
|
381
|
+
|
382
|
+
if !obo1_hash[a]
|
383
|
+
puts "can't find #{k}\n"
|
384
|
+
next
|
385
|
+
end
|
386
|
+
|
387
|
+
if !obo2_hash[b]
|
388
|
+
puts "can't find #{k}\n"
|
389
|
+
next
|
390
|
+
end
|
391
|
+
|
392
|
+
obo1_hash[a].relationships.each do |rel, id|
|
393
|
+
if rel == opt[:property]
|
394
|
+
ids_1.push id
|
395
|
+
end
|
396
|
+
end
|
397
|
+
|
398
|
+
obo2_hash[b].relationships.each do |rel, id|
|
399
|
+
if rel == opt[:property]
|
400
|
+
ids_2.push id
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
404
|
+
unplaced = true
|
405
|
+
|
406
|
+
ids_1.each do |c|
|
407
|
+
ids_2.each do |d|
|
408
|
+
t = "#{a} -> #{b}"
|
409
|
+
if hash[c] == d
|
410
|
+
result[:yes].push(t)
|
411
|
+
unplaced = false
|
412
|
+
next # don't add again after we find a hit
|
413
|
+
else
|
414
|
+
result[:no].push(t)
|
415
|
+
unplaced = false
|
416
|
+
end
|
417
|
+
end
|
418
|
+
end
|
419
|
+
result[:unplaced]
|
420
|
+
|
421
|
+
end
|
422
|
+
|
423
|
+
result
|
424
|
+
end
|
425
|
+
|
426
|
+
|
427
|
+
|
428
|
+
|
233
429
|
#== Helper methods that don't require the obo_parser library
|
234
430
|
|
235
431
|
# Given a Term id and a String representing an OBO file returns that stanza.
|
data/obo_parser.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{obo_parser}
|
8
|
-
s.version = "0.3.
|
8
|
+
s.version = "0.3.6"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["mjy"]
|
12
|
-
s.date = %q{2011-
|
12
|
+
s.date = %q{2011-08-10}
|
13
13
|
s.description = %q{Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. }
|
14
14
|
s.email = %q{diapriid@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: obo_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 31
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 3
|
9
|
-
-
|
10
|
-
version: 0.3.
|
9
|
+
- 6
|
10
|
+
version: 0.3.6
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- mjy
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-08-10 00:00:00 Z
|
19
19
|
dependencies: []
|
20
20
|
|
21
21
|
description: "Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. "
|