obo_parser 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +1 -1
- data/README.rdoc +37 -9
- data/VERSION +1 -1
- data/lib/lexer.rb +1 -0
- data/lib/obo_parser.rb +119 -73
- data/lib/parser.rb +13 -10
- data/lib/tokens.rb +94 -83
- data/lib/utilities.rb +8 -19
- data/obo_parser.gemspec +4 -7
- data/test/cell.obo +1 -0
- data/test/test_obo_parser.rb +36 -12
- metadata +11 -8
- data/README +0 -13
data/LICENSE
CHANGED
data/README.rdoc
CHANGED
@@ -1,16 +1,44 @@
|
|
1
1
|
= obo_parser
|
2
2
|
|
3
|
-
A simple
|
3
|
+
A simple Ruby gem for parsing OBO formatted ontology files. Useful for reporting, comparing, and mapping data to other databases. There is presently no functionality for logical inference across the ontology.
|
4
4
|
|
5
|
-
==
|
5
|
+
== Installation
|
6
|
+
|
7
|
+
gem install obo_parser
|
8
|
+
|
9
|
+
== Use
|
10
|
+
|
11
|
+
require 'rubygems'
|
12
|
+
require 'obo_parser'
|
13
|
+
foo = parse_obo_file(File.read('my_ontology.obo')) # => An OboParser instance. Targets OBO v. 1.4
|
14
|
+
first_term = foo.terms.first # => An OboParser#Term instance
|
15
|
+
d = first_term.def # => An OboParser#Tag instance
|
6
16
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
17
|
+
d.tag # => 'def'
|
18
|
+
d.value # => 'Some defintition'
|
19
|
+
d.xrefs # => ['xref:123', 'xref:456']
|
20
|
+
d.comment # => 'Some comment'
|
21
|
+
|
22
|
+
t = first_term.name # => An OboParser#Tag instance
|
23
|
+
t.tag # => 'name'
|
24
|
+
t.value # => 'Some Term name'
|
25
|
+
|
26
|
+
o = first_term.other_tags # => [OboParser#Tag, ... ] An array of tags that are not specially referenced in an OboParser::Stanza
|
27
|
+
o.first # => An OboParser#Tag instance
|
28
|
+
|
29
|
+
first_typedef = foo.typdefs.first # => An OboParser#Typdef instance
|
30
|
+
first_typdef.id.value # => 'Some typedef id'
|
31
|
+
first_typdef.name.value # => 'Some typedef name'
|
32
|
+
|
33
|
+
foo.terms.first.tags_named('is_a') # => [OboParser#Tag, ... ]
|
34
|
+
foo.terms.first.tags_named('is_a').first.tag # => 'is_a'
|
35
|
+
foo.terms.first.tags_named('is_a').first.value # => 'Some Term id'
|
36
|
+
|
37
|
+
See also /test/test_obo_parser.rb
|
38
|
+
|
39
|
+
== Utilties
|
40
|
+
|
41
|
+
A small set of methods (e.g. comparing OBO ontologies) utilizing the gem are included in utilities.rb. See /lib/utilities.rb. For example, shared labels across sets of ontologies can be found and returned.
|
14
42
|
|
15
43
|
== Copyright
|
16
44
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
data/lib/lexer.rb
CHANGED
data/lib/obo_parser.rb
CHANGED
@@ -2,113 +2,159 @@
|
|
2
2
|
# uses the PhyloTree parser/lexer engine by Krishna Dole which in turn was based on
|
3
3
|
# Thomas Mailund's <mailund@birc.dk> 'newick-1.0.5' Python library
|
4
4
|
|
5
|
-
|
5
|
+
#== Outstanding issues:
|
6
|
+
# * Better documentation
|
7
|
+
# * More tests
|
6
8
|
|
7
9
|
module OboParser
|
8
10
|
|
9
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
|
10
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
|
11
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
|
12
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'utilities'))
|
11
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
|
12
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
|
13
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
|
14
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'utilities'))
|
13
15
|
|
16
|
+
class OboParser
|
17
|
+
attr_accessor :terms, :typedefs
|
14
18
|
|
15
|
-
|
16
|
-
|
19
|
+
def initialize
|
20
|
+
@terms = []
|
21
|
+
@typedefs = []
|
22
|
+
true
|
23
|
+
end
|
17
24
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
true
|
22
|
-
end
|
25
|
+
def term_strings # :yields: Array of Strings
|
26
|
+
@terms.collect{|t| t.name.value}.sort
|
27
|
+
end
|
23
28
|
|
24
|
-
|
25
|
-
|
26
|
-
|
29
|
+
# Warning! This assumes terms are unique, they are NOT required to be so in an OBO file.
|
30
|
+
def term_hash # :yields: Hash (String => String) (name => id)
|
31
|
+
@terms.inject({}) {|sum, t| sum.update(t.name.value => t.id.value)}
|
32
|
+
end
|
27
33
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
end
|
34
|
+
def id_hash # :yields: Hash (String => String (id => name))
|
35
|
+
@terms.inject({}) {|sum, t| sum.update(t.id.value => t.name.value)}
|
36
|
+
end
|
32
37
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
38
|
+
class Stanza
|
39
|
+
# Make special reference to several specific types of tags (:name, :id), subclasses will remove additional special typs from :other_tags
|
40
|
+
attr_accessor :name, :id, :other_tags
|
41
|
+
|
42
|
+
def initialize(tags)
|
43
|
+
@other_tags = []
|
44
|
+
|
45
|
+
while tags.length != 0
|
46
|
+
t = tags.shift
|
47
|
+
|
48
|
+
new_tag = OboParser::Tag.new
|
49
|
+
new_tag.tag = t.tag
|
50
|
+
new_tag.value = t.value
|
51
|
+
new_tag.comment = t.comment
|
52
|
+
new_tag.xrefs = t.xrefs
|
53
|
+
|
54
|
+
case new_tag.tag
|
55
|
+
when 'id'
|
56
|
+
@id = new_tag
|
57
|
+
when 'name'
|
58
|
+
@name = new_tag
|
59
|
+
else
|
60
|
+
@other_tags.push(new_tag)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
37
64
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
tags.each do |t|
|
46
|
-
case t[0]
|
47
|
-
when 'id'
|
48
|
-
@id = t[1]
|
49
|
-
when 'name'
|
50
|
-
@name = t[1]
|
51
|
-
else
|
52
|
-
@tags[t[0]] = [] if !@tags[t[0]]
|
53
|
-
@tags[t[0]].push t[1]
|
65
|
+
#=== Convienience methods
|
66
|
+
|
67
|
+
def tags_named(tag_name = nil)
|
68
|
+
return nil if tag_name.nil?
|
69
|
+
result = []
|
70
|
+
@other_tags.each do |t|
|
71
|
+
result.push t if t.tag == tag_name
|
54
72
|
end
|
73
|
+
result
|
55
74
|
end
|
75
|
+
|
56
76
|
end
|
57
|
-
end
|
58
77
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
78
|
+
class Term < Stanza
|
79
|
+
attr_accessor :def
|
80
|
+
def initialize(tags)
|
81
|
+
super
|
82
|
+
|
83
|
+
anonymous_tags = []
|
84
|
+
|
85
|
+
# Loop through "unclaimed" tags and reference those specific to Term
|
86
|
+
while @other_tags.size != 0
|
87
|
+
t = @other_tags.shift
|
88
|
+
case t.tag
|
89
|
+
when 'def'
|
90
|
+
@def = t
|
91
|
+
else
|
92
|
+
anonymous_tags.push(t)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
@other_tags = anonymous_tags
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
class Typedef < Stanza
|
100
|
+
def initialize(tags)
|
101
|
+
super
|
102
|
+
#anonymous_tags = []
|
103
|
+
## Loop through "unclaimed" tags and reference those specific to Typedef
|
104
|
+
#while @other_tags.size != 0
|
105
|
+
# t = @other_tags.shift
|
106
|
+
# case t.tag
|
107
|
+
# when 'def'
|
108
|
+
# @def = t
|
109
|
+
# else
|
110
|
+
# anonymous_tags.push(t)
|
111
|
+
# end
|
112
|
+
# @other_tags = anonymous_tags
|
113
|
+
#end
|
114
|
+
end
|
63
115
|
end
|
64
|
-
end
|
65
116
|
|
66
|
-
|
67
|
-
|
68
|
-
super
|
117
|
+
class Tag
|
118
|
+
attr_accessor :tag, :value, :xrefs, :comment
|
69
119
|
end
|
120
|
+
|
70
121
|
end
|
71
122
|
|
72
|
-
|
123
|
+
class OboParserBuilder
|
124
|
+
def initialize
|
125
|
+
@of = OboParser.new
|
126
|
+
end
|
73
127
|
|
128
|
+
def add_term(tags)
|
129
|
+
@of.terms.push OboParser::Term.new(tags)
|
130
|
+
end
|
74
131
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
end
|
132
|
+
def add_typedef(tags)
|
133
|
+
@of.typedefs.push OboParser::Typedef.new(tags)
|
134
|
+
end
|
79
135
|
|
80
|
-
|
81
|
-
|
82
|
-
|
136
|
+
def obo_file
|
137
|
+
@of
|
138
|
+
end
|
83
139
|
|
84
|
-
def add_typedef(tags)
|
85
|
-
@of.typedefs.push OboParser::Typedef.new(tags)
|
86
140
|
end
|
87
141
|
|
88
|
-
|
89
|
-
@of
|
142
|
+
class ParseError < StandardError
|
90
143
|
end
|
91
144
|
|
92
|
-
end
|
93
|
-
|
94
|
-
class ParseError < StandardError
|
95
|
-
end
|
96
|
-
|
97
145
|
end # end module
|
98
146
|
|
99
|
-
|
147
|
+
#= Implementation
|
148
|
+
|
100
149
|
def parse_obo_file(input)
|
101
150
|
@input = input
|
102
|
-
|
151
|
+
raise(OboParser::ParseError, "Nothing passed to parse!") if !@input || @input.size == 0
|
152
|
+
|
153
|
+
# Comments are handled now.
|
154
|
+
# @input.gsub!(/(\s*?![^!'"]*?\n)/i, "\n") # strip out comments - this is a kludge, likely needs fixing!!
|
103
155
|
|
104
|
-
@input.gsub!(/(\s*?![^!'"]*?\n)/i, "\n") # strip out comments - this is a kludge, likely needs fixing!!
|
105
|
-
|
106
156
|
builder = OboParser::OboParserBuilder.new
|
107
157
|
lexer = OboParser::Lexer.new(@input)
|
108
158
|
OboParser::Parser.new(lexer, builder).parse_file
|
109
159
|
return builder.obo_file
|
110
160
|
end
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
data/lib/parser.rb
CHANGED
@@ -5,21 +5,21 @@ class OboParser::Parser
|
|
5
5
|
end
|
6
6
|
|
7
7
|
def parse_file
|
8
|
-
#
|
8
|
+
# At present we ignore the header lines
|
9
9
|
while !@lexer.peek(OboParser::Tokens::Term)
|
10
10
|
@lexer.pop(OboParser::Tokens::TagValuePair)
|
11
11
|
end
|
12
12
|
|
13
13
|
i = 0
|
14
14
|
while !@lexer.peek(OboParser::Tokens::Typedef) && !@lexer.peek(OboParser::Tokens::EndOfFile)
|
15
|
-
raise OboParser::ParseError, "infinite loop in Terms" if i > 10000000
|
15
|
+
raise OboParser::ParseError, "infinite loop in Terms" if i > 10000000 # there aren't that many words!
|
16
16
|
parse_term
|
17
17
|
i += 1
|
18
18
|
end
|
19
19
|
|
20
20
|
i = 0
|
21
21
|
while @lexer.peek(OboParser::Tokens::Typedef)
|
22
|
-
raise OboParser::ParseError,"infinite loop in
|
22
|
+
raise OboParser::ParseError,"infinite loop in Typedefs" if i > 1000000
|
23
23
|
parse_typedef
|
24
24
|
i += 1
|
25
25
|
end
|
@@ -29,11 +29,11 @@ class OboParser::Parser
|
|
29
29
|
t = @lexer.pop(OboParser::Tokens::Term)
|
30
30
|
tags = []
|
31
31
|
while !@lexer.peek(OboParser::Tokens::Term) && !@lexer.peek(OboParser::Tokens::Typedef) && !@lexer.peek(OboParser::Tokens::EndOfFile)
|
32
|
-
|
32
|
+
begin
|
33
33
|
t = @lexer.pop(OboParser::Tokens::TagValuePair)
|
34
|
-
tags.push
|
35
|
-
|
36
|
-
raise
|
34
|
+
tags.push(t)
|
35
|
+
rescue
|
36
|
+
raise
|
37
37
|
end
|
38
38
|
end
|
39
39
|
@builder.add_term(tags)
|
@@ -41,11 +41,14 @@ class OboParser::Parser
|
|
41
41
|
|
42
42
|
def parse_typedef
|
43
43
|
@lexer.pop(OboParser::Tokens::Typedef)
|
44
|
-
# @t = @builder.stub_typdef
|
45
44
|
tags = []
|
46
45
|
while !@lexer.peek(OboParser::Tokens::Typedef) && @lexer.peek(OboParser::Tokens::TagValuePair)
|
47
|
-
|
48
|
-
|
46
|
+
begin
|
47
|
+
t = @lexer.pop(OboParser::Tokens::TagValuePair)
|
48
|
+
tags.push(t)
|
49
|
+
rescue
|
50
|
+
raise
|
51
|
+
end
|
49
52
|
end
|
50
53
|
@builder.add_typedef(tags)
|
51
54
|
end
|
data/lib/tokens.rb
CHANGED
@@ -9,9 +9,6 @@ module OboParser::Tokens
|
|
9
9
|
end
|
10
10
|
end
|
11
11
|
|
12
|
-
# in ruby, \A is needed if you want to only match at the beginning of the string, we need this everywhere, as we're
|
13
|
-
# moving along popping off
|
14
|
-
|
15
12
|
class Term < Token
|
16
13
|
@regexp = Regexp.new(/\A\s*(\[term\])\s*/i)
|
17
14
|
end
|
@@ -20,19 +17,52 @@ module OboParser::Tokens
|
|
20
17
|
@regexp = Regexp.new(/\A\s*(\[typedef\])\s*/i)
|
21
18
|
end
|
22
19
|
|
23
|
-
|
24
20
|
class TagValuePair < Token
|
25
|
-
attr_reader :tag, :
|
26
|
-
@regexp = Regexp.new(/\A\s*([^:]+:.+)\s*\n*/i)
|
21
|
+
attr_reader :tag, :comment, :xrefs
|
22
|
+
@regexp = Regexp.new(/\A\s*([^:]+:.+)\s*\n*/i)
|
27
23
|
def initialize(str)
|
28
24
|
str.strip!
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
25
|
+
tag, value = str.split(':',2)
|
26
|
+
|
27
|
+
value.strip!
|
28
|
+
|
29
|
+
# Handle comments
|
30
|
+
if value =~ /(!\s*.+)\Z/i
|
31
|
+
@comment = $1
|
32
|
+
value.gsub!(@comment, '')
|
33
|
+
@comment.gsub!(/\A!\s*/, '')
|
34
|
+
end
|
35
|
+
|
36
|
+
# Break out the xrefs, could be made made robust
|
37
|
+
# Assumes non-quoted comma delimited in format 'foo:bar, stuff:things'
|
38
|
+
if value =~ /(\s*\[.*\]\s*)/i
|
39
|
+
xref_list = $1
|
40
|
+
value.gsub!(xref_list, '')
|
41
|
+
xref_list.strip!
|
42
|
+
xref_list = xref_list[1..-2] # strip []
|
43
|
+
@xrefs = xref_list.split(",")
|
44
|
+
end
|
33
45
|
|
34
|
-
@tag =
|
35
|
-
@value =
|
46
|
+
@tag = tag.strip
|
47
|
+
@value = value.strip
|
48
|
+
|
49
|
+
@value = @value[1..-2] if @value[0..0] == "\"" # get rid of quote marks
|
50
|
+
@value = @value[1..-2] if @value[0..0] == "'" # get rid of quote marks
|
51
|
+
|
52
|
+
@tag = @tag.strip
|
53
|
+
@value = @value.strip
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
class XrefList < Token
|
58
|
+
@regexp = Regexp.new(/\A\s*\[(.+)\]\s*\n*/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
|
59
|
+
def initialize(str)
|
60
|
+
str.strip!
|
61
|
+
@value = {}
|
62
|
+
str.split(",").each do |s|
|
63
|
+
i = s.split(":")
|
64
|
+
@value.merge!(i[0].strip => i[1].strip)
|
65
|
+
end
|
36
66
|
end
|
37
67
|
end
|
38
68
|
|
@@ -56,8 +86,6 @@ module OboParser::Tokens
|
|
56
86
|
end
|
57
87
|
end
|
58
88
|
|
59
|
-
|
60
|
-
|
61
89
|
# note we grab EOL and ; here
|
62
90
|
class ValuePair < Token
|
63
91
|
@regexp = Regexp.new(/\A\s*([\w\d\_\&]+\s*=\s*((\'[^\']+\')|(\(.*\))|(\"[^\"]+\")|([^\s\n\t;]+)))[\s\n\t;]+/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
|
@@ -71,90 +99,73 @@ module OboParser::Tokens
|
|
71
99
|
end
|
72
100
|
end
|
73
101
|
|
74
|
-
class
|
75
|
-
@regexp = Regexp.new(
|
76
|
-
end
|
77
|
-
|
78
|
-
class RowVec < Token
|
79
|
-
@regexp = Regexp.new(/\A\s*(.+)\s*\n/i)
|
80
|
-
def initialize(str)
|
81
|
-
s = str.split(/\(|\)/).collect{|s| s=~ /[\,|\s]/ ? s.split(/[\,|\s]/) : s}.inject([]){|sum, x| x.class == Array ? sum << x.delete_if {|y| y == "" } : sum + x.strip.split(//)}
|
82
|
-
@value = s
|
83
|
-
end
|
102
|
+
class EndOfFile < Token
|
103
|
+
@regexp = Regexp.new('\A(\s*\n*)\Z')
|
84
104
|
end
|
85
105
|
|
86
|
-
|
87
|
-
|
88
106
|
## punctuation
|
89
107
|
|
90
108
|
class LBracket < Token
|
91
109
|
@regexp = Regexp.new('\A\s*(\[)\s*')
|
92
110
|
end
|
93
111
|
|
94
|
-
class
|
95
|
-
|
96
|
-
end
|
97
|
-
|
98
|
-
class
|
99
|
-
|
100
|
-
end
|
101
|
-
|
102
|
-
class RParen < Token
|
103
|
-
|
104
|
-
end
|
112
|
+
#class LParen < Token
|
113
|
+
# @regexp = Regexp.new('\A\s*(\()\s*')
|
114
|
+
#end
|
115
|
+
|
116
|
+
#class RBracket < Token
|
117
|
+
# @regexp = Regexp.new('\A\s*(\])\s*')
|
118
|
+
#end
|
119
|
+
|
120
|
+
#class RParen < Token
|
121
|
+
# @regexp = Regexp.new('\A\s*(\))\s*')
|
122
|
+
#end
|
123
|
+
|
124
|
+
#class Equals < Token
|
125
|
+
# @regexp = Regexp.new('\A\s*(=)\s*')
|
126
|
+
#end
|
127
|
+
|
128
|
+
#class BckSlash < Token
|
129
|
+
# @regexp = Regexp.new('\A\s*(\/)\s*')
|
130
|
+
#end
|
131
|
+
|
132
|
+
#class Colon < Token
|
133
|
+
# @regexp = Regexp.new('\A\s*(:)\s*')
|
134
|
+
#end
|
135
|
+
|
136
|
+
#class SemiColon < Token
|
137
|
+
# @regexp = Regexp.new('\A\s*(;)\s*')
|
138
|
+
#end
|
139
|
+
|
140
|
+
#class Comma < Token
|
141
|
+
# @regexp = Regexp.new('\A\s*(\,)\s*')
|
142
|
+
#end
|
105
143
|
|
106
|
-
class
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
end
|
118
|
-
|
119
|
-
|
120
|
-
@regexp = Regexp.new('\A\s*(;)\s*')
|
121
|
-
end
|
122
|
-
|
123
|
-
class Comma < Token
|
124
|
-
@regexp = Regexp.new('\A\s*(\,)\s*')
|
125
|
-
end
|
126
|
-
|
127
|
-
class EndOfFile < Token
|
128
|
-
@regexp = Regexp.new('\A(\s*\n*)\Z')
|
129
|
-
end
|
130
|
-
|
131
|
-
class Number < Token
|
132
|
-
@regexp = Regexp.new('\A\s*(-?\d+(\.\d+)?([eE][+-]?\d+)?)\s*')
|
133
|
-
def initialize(str)
|
134
|
-
# a little oddness here, in some case we don't want to include the .0
|
135
|
-
# see issues with numbers as labels
|
136
|
-
if str =~ /\./
|
137
|
-
@value = str.to_f
|
138
|
-
else
|
139
|
-
@value = str.to_i
|
140
|
-
end
|
141
|
-
|
142
|
-
end
|
143
|
-
end
|
144
|
-
|
145
|
-
# Tokens::NexusComment
|
146
|
-
|
147
|
-
# this list also defines priority, i.e. if tokens have overlap (which they shouldn't!!) then the earlier indexed token will match first
|
144
|
+
#class Number < Token
|
145
|
+
# @regexp = Regexp.new('\A\s*(-?\d+(\.\d+)?([eE][+-]?\d+)?)\s*')
|
146
|
+
# def initialize(str)
|
147
|
+
# # a little oddness here, in some case we don't want to include the .0
|
148
|
+
# # see issues with numbers as labels
|
149
|
+
# if str =~ /\./
|
150
|
+
# @value = str.to_f
|
151
|
+
# else
|
152
|
+
# @value = str.to_i
|
153
|
+
# end
|
154
|
+
# end
|
155
|
+
#end
|
156
|
+
|
157
|
+
# This list defines inclusion and priority, i.e. if tokens have overlap then the earlier indexed token will match first
|
148
158
|
def self.obo_file_token_list
|
149
159
|
[
|
150
160
|
OboParser::Tokens::Term,
|
151
161
|
OboParser::Tokens::Typedef,
|
152
|
-
OboParser::Tokens::TagValuePair,
|
153
|
-
OboParser::Tokens::NameValuePair, # not implemented
|
154
|
-
OboParser::Tokens::Dbxref, # not implemented
|
155
162
|
OboParser::Tokens::LBracket,
|
163
|
+
OboParser::Tokens::TagValuePair,
|
164
|
+
OboParser::Tokens::XrefList,
|
156
165
|
OboParser::Tokens::EndOfFile
|
166
|
+
# OboParser::Tokens::NameValuePair, # not implemented
|
167
|
+
# OboParser::Tokens::Dbxref, # not implemented
|
157
168
|
]
|
158
169
|
end
|
159
|
-
|
170
|
+
|
160
171
|
end
|
data/lib/utilities.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'ruby-debug'
|
3
|
-
require 'obo_parser'
|
3
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'obo_parser'))
|
4
4
|
|
5
5
|
module OboParser::Utilities
|
6
6
|
|
@@ -11,7 +11,6 @@ module OboParser::Utilities
|
|
11
11
|
# of4 = File.read('hao4.obo')
|
12
12
|
#
|
13
13
|
# OboParser::Utilities::dump_comparison_by_id([of1, of2, of3, of4])
|
14
|
-
|
15
14
|
def self.dump_comparison_by_id(files = []) # :yields: String
|
16
15
|
of = []
|
17
16
|
files.each_with_index do |f, i|
|
@@ -39,10 +38,9 @@ module OboParser::Utilities
|
|
39
38
|
end
|
40
39
|
end
|
41
40
|
|
41
|
+
# infile is a tab delimited 2 column file that contains IDs in the from FOO_1234
|
42
|
+
# The file is replicated to STDOUT replacing the ID with the Term
|
42
43
|
def self.alignment_translate(infile = nil) # :yields: String
|
43
|
-
# infile is a tab delimited 2 column file that contains IDs in the from FOO_1234
|
44
|
-
# The file is replicated to STDOUT replacing the ID with the Term
|
45
|
-
|
46
44
|
agreement = ARGV[0]
|
47
45
|
raise "Provide a file with comparison." if agreement.nil?
|
48
46
|
comparison = File.read(agreement)
|
@@ -70,22 +68,15 @@ module OboParser::Utilities
|
|
70
68
|
"\t" +
|
71
69
|
(identifiers[v2].nil? ? 'NOT FOUND' : identifiers[v2])
|
72
70
|
end
|
73
|
-
|
74
71
|
end
|
75
72
|
|
76
|
-
|
77
|
-
def self.shared_labels(files = []) # :yields: String
|
78
|
-
|
79
73
|
# Returns labels found in all passed ontologies
|
80
|
-
|
81
74
|
# Usage:
|
82
|
-
|
83
|
-
#
|
84
|
-
#
|
85
|
-
#
|
86
|
-
|
87
|
-
# shared_labels([of1, of6])
|
88
|
-
|
75
|
+
# of1 = File.read('fly_anatomy.obo')
|
76
|
+
# of2 = File.read('hao.obo')
|
77
|
+
# of3 = File.read('mosquito_anatomy.obo')
|
78
|
+
# shared_labels([of1, of6])
|
79
|
+
def self.shared_labels(files = []) # :yields: String
|
89
80
|
comparison = {}
|
90
81
|
|
91
82
|
files.each do |f|
|
@@ -109,10 +100,8 @@ module OboParser::Utilities
|
|
109
100
|
end
|
110
101
|
|
111
102
|
puts match.sort.join("\n")
|
112
|
-
|
113
103
|
puts "\n#{match.length} total."
|
114
104
|
|
115
105
|
end
|
116
|
-
|
117
106
|
|
118
107
|
end
|
data/obo_parser.gemspec
CHANGED
@@ -5,23 +5,21 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{obo_parser}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.3.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["mjy"]
|
12
|
-
s.date = %q{2011-
|
12
|
+
s.date = %q{2011-04-05}
|
13
13
|
s.description = %q{Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. }
|
14
14
|
s.email = %q{diapriid@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
16
16
|
"LICENSE",
|
17
|
-
"README",
|
18
17
|
"README.rdoc"
|
19
18
|
]
|
20
19
|
s.files = [
|
21
20
|
".document",
|
22
21
|
".gitignore",
|
23
22
|
"LICENSE",
|
24
|
-
"README",
|
25
23
|
"README.rdoc",
|
26
24
|
"Rakefile",
|
27
25
|
"VERSION",
|
@@ -43,17 +41,16 @@ Gem::Specification.new do |s|
|
|
43
41
|
s.homepage = %q{http://github.com/mjy/obo_parser}
|
44
42
|
s.rdoc_options = ["--charset=UTF-8"]
|
45
43
|
s.require_paths = ["lib"]
|
46
|
-
s.rubygems_version = %q{1.3
|
44
|
+
s.rubygems_version = %q{1.5.3}
|
47
45
|
s.summary = %q{A simple OBO file handler.}
|
48
46
|
s.test_files = [
|
49
47
|
"test/test_obo_parser.rb"
|
50
48
|
]
|
51
49
|
|
52
50
|
if s.respond_to? :specification_version then
|
53
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
54
51
|
s.specification_version = 3
|
55
52
|
|
56
|
-
if Gem::Version.new(Gem::
|
53
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
57
54
|
else
|
58
55
|
end
|
59
56
|
else
|
data/test/cell.obo
CHANGED
data/test/test_obo_parser.rb
CHANGED
@@ -2,7 +2,7 @@ require 'test/unit'
|
|
2
2
|
require 'rubygems'
|
3
3
|
require 'ruby-debug'
|
4
4
|
|
5
|
-
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/obo_parser'))
|
5
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/obo_parser'))
|
6
6
|
|
7
7
|
class OboParserTest < Test::Unit::TestCase
|
8
8
|
def test_truth
|
@@ -16,15 +16,12 @@ class Test_OboParserBuilder < Test::Unit::TestCase
|
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
19
|
-
|
20
19
|
class Test_Regex < Test::Unit::TestCase
|
21
20
|
|
22
|
-
def
|
23
|
-
|
24
|
-
txt = "line without note\nBegin taxa; ! comment\n! not this line\n'this ok!'\n\"this too!!\""
|
25
|
-
r2 = Regexp.new(/(\s*?![^!'"]*?\n)/i)
|
26
|
-
assert_equal "line without note\nBegin taxa;\n\n'this ok!'\n\"this too!!\"" , txt.gsub(r2, "\n")
|
21
|
+
def test_some_regex
|
22
|
+
assert true
|
27
23
|
end
|
24
|
+
|
28
25
|
end
|
29
26
|
|
30
27
|
class Test_Lexer < Test::Unit::TestCase
|
@@ -60,7 +57,8 @@ class Test_Lexer < Test::Unit::TestCase
|
|
60
57
|
|
61
58
|
assert t = lexer.pop(OboParser::Tokens::TagValuePair)
|
62
59
|
assert_equal 'def', t.tag
|
63
|
-
assert_equal '
|
60
|
+
assert_equal 'A chromatic scalar-circular quality inhering in an object that manifests in an observer by virtue of the dominant wavelength of the visible light; may be subject to fiat divisions, typically into 7 or 8 spectra.', t.value
|
61
|
+
assert_equal(['PATOC:cjm'], t.xrefs)
|
64
62
|
|
65
63
|
assert t = lexer.pop(OboParser::Tokens::TagValuePair)
|
66
64
|
assert_equal 'subset', t.tag
|
@@ -76,10 +74,27 @@ class Test_Lexer < Test::Unit::TestCase
|
|
76
74
|
assert lexer.pop(OboParser::Tokens::Term)
|
77
75
|
end
|
78
76
|
|
77
|
+
def test_xref_list
|
78
|
+
lexer = OboParser::Lexer.new("[foo:bar, stuff:things]")
|
79
|
+
assert t = lexer.pop(OboParser::Tokens::XrefList)
|
80
|
+
hsh = {'foo' => 'bar', 'stuff' => 'things'}
|
81
|
+
assert_equal hsh, t.value
|
82
|
+
end
|
83
|
+
|
79
84
|
def test_tagvaluepair
|
80
85
|
lexer = OboParser::Lexer.new("id: PATO:0000179")
|
81
86
|
assert lexer.pop(OboParser::Tokens::TagValuePair)
|
82
87
|
end
|
88
|
+
|
89
|
+
def test_tagvaluepair_with_comments_and_xrefs
|
90
|
+
lexer = OboParser::Lexer.new("def: \"The foo that is bar.\" [PATO:0000179] ! FOO! \n")
|
91
|
+
assert t = lexer.pop(OboParser::Tokens::TagValuePair)
|
92
|
+
assert_equal 'def', t.tag
|
93
|
+
assert_equal 'The foo that is bar.', t.value
|
94
|
+
assert_equal 'FOO!', t.comment
|
95
|
+
assert_equal(['PATO:0000179'], t.xrefs)
|
96
|
+
end
|
97
|
+
|
83
98
|
end
|
84
99
|
|
85
100
|
class Test_Parser < Test::Unit::TestCase
|
@@ -89,10 +104,13 @@ class Test_Parser < Test::Unit::TestCase
|
|
89
104
|
|
90
105
|
def test_file_parsing
|
91
106
|
foo = parse_obo_file(@of)
|
92
|
-
assert_equal 'pato', foo.terms[0].name
|
93
|
-
assert_equal 'quality', foo.terms[1].name
|
94
|
-
assert_equal 'part_of', foo.typedefs.last.name
|
95
|
-
assert_equal 'OBO_REL:part_of', foo.typedefs.last.id
|
107
|
+
assert_equal 'pato', foo.terms[0].name.value
|
108
|
+
assert_equal 'quality', foo.terms[1].name.value
|
109
|
+
assert_equal 'part_of', foo.typedefs.last.name.value
|
110
|
+
assert_equal 'OBO_REL:part_of', foo.typedefs.last.id.value
|
111
|
+
assert_equal(['PATOC:GVG'], foo.terms[1].def.xrefs)
|
112
|
+
assert_equal 'is_obsolete', foo.terms.first.tags_named('is_obsolete').first.tag
|
113
|
+
assert_equal 'true', foo.terms.first.tags_named('is_obsolete').first.value
|
96
114
|
end
|
97
115
|
|
98
116
|
def teardown
|
@@ -106,3 +124,9 @@ class Test_Parser < Test::Unit::TestCase
|
|
106
124
|
|
107
125
|
end
|
108
126
|
|
127
|
+
|
128
|
+
|
129
|
+
|
130
|
+
|
131
|
+
|
132
|
+
|
metadata
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: obo_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 19
|
5
|
+
prerelease:
|
5
6
|
segments:
|
6
7
|
- 0
|
7
|
-
-
|
8
|
-
-
|
9
|
-
version: 0.
|
8
|
+
- 3
|
9
|
+
- 0
|
10
|
+
version: 0.3.0
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
13
|
- mjy
|
@@ -14,7 +15,7 @@ autorequire:
|
|
14
15
|
bindir: bin
|
15
16
|
cert_chain: []
|
16
17
|
|
17
|
-
date: 2011-
|
18
|
+
date: 2011-04-05 00:00:00 -04:00
|
18
19
|
default_executable:
|
19
20
|
dependencies: []
|
20
21
|
|
@@ -26,13 +27,11 @@ extensions: []
|
|
26
27
|
|
27
28
|
extra_rdoc_files:
|
28
29
|
- LICENSE
|
29
|
-
- README
|
30
30
|
- README.rdoc
|
31
31
|
files:
|
32
32
|
- .document
|
33
33
|
- .gitignore
|
34
34
|
- LICENSE
|
35
|
-
- README
|
36
35
|
- README.rdoc
|
37
36
|
- Rakefile
|
38
37
|
- VERSION
|
@@ -60,23 +59,27 @@ rdoc_options:
|
|
60
59
|
require_paths:
|
61
60
|
- lib
|
62
61
|
required_ruby_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
63
|
requirements:
|
64
64
|
- - ">="
|
65
65
|
- !ruby/object:Gem::Version
|
66
|
+
hash: 3
|
66
67
|
segments:
|
67
68
|
- 0
|
68
69
|
version: "0"
|
69
70
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
70
72
|
requirements:
|
71
73
|
- - ">="
|
72
74
|
- !ruby/object:Gem::Version
|
75
|
+
hash: 3
|
73
76
|
segments:
|
74
77
|
- 0
|
75
78
|
version: "0"
|
76
79
|
requirements: []
|
77
80
|
|
78
81
|
rubyforge_project:
|
79
|
-
rubygems_version: 1.3
|
82
|
+
rubygems_version: 1.5.3
|
80
83
|
signing_key:
|
81
84
|
specification_version: 3
|
82
85
|
summary: A simple OBO file handler.
|