obo_parser 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +1 -1
- data/README.rdoc +37 -9
- data/VERSION +1 -1
- data/lib/lexer.rb +1 -0
- data/lib/obo_parser.rb +119 -73
- data/lib/parser.rb +13 -10
- data/lib/tokens.rb +94 -83
- data/lib/utilities.rb +8 -19
- data/obo_parser.gemspec +4 -7
- data/test/cell.obo +1 -0
- data/test/test_obo_parser.rb +36 -12
- metadata +11 -8
- data/README +0 -13
data/LICENSE
CHANGED
data/README.rdoc
CHANGED
@@ -1,16 +1,44 @@
|
|
1
1
|
= obo_parser
|
2
2
|
|
3
|
-
A simple
|
3
|
+
A simple Ruby gem for parsing OBO formatted ontology files. Useful for reporting, comparing, and mapping data to other databases. There is presently no functionality for logical inference across the ontology.
|
4
4
|
|
5
|
-
==
|
5
|
+
== Installation
|
6
|
+
|
7
|
+
gem install obo_parser
|
8
|
+
|
9
|
+
== Use
|
10
|
+
|
11
|
+
require 'rubygems'
|
12
|
+
require 'obo_parser'
|
13
|
+
foo = parse_obo_file(File.read('my_ontology.obo')) # => An OboParser instance. Targets OBO v. 1.4
|
14
|
+
first_term = foo.terms.first # => An OboParser#Term instance
|
15
|
+
d = first_term.def # => An OboParser#Tag instance
|
6
16
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
17
|
+
d.tag # => 'def'
|
18
|
+
d.value # => 'Some defintition'
|
19
|
+
d.xrefs # => ['xref:123', 'xref:456']
|
20
|
+
d.comment # => 'Some comment'
|
21
|
+
|
22
|
+
t = first_term.name # => An OboParser#Tag instance
|
23
|
+
t.tag # => 'name'
|
24
|
+
t.value # => 'Some Term name'
|
25
|
+
|
26
|
+
o = first_term.other_tags # => [OboParser#Tag, ... ] An array of tags that are not specially referenced in an OboParser::Stanza
|
27
|
+
o.first # => An OboParser#Tag instance
|
28
|
+
|
29
|
+
first_typedef = foo.typdefs.first # => An OboParser#Typdef instance
|
30
|
+
first_typdef.id.value # => 'Some typedef id'
|
31
|
+
first_typdef.name.value # => 'Some typedef name'
|
32
|
+
|
33
|
+
foo.terms.first.tags_named('is_a') # => [OboParser#Tag, ... ]
|
34
|
+
foo.terms.first.tags_named('is_a').first.tag # => 'is_a'
|
35
|
+
foo.terms.first.tags_named('is_a').first.value # => 'Some Term id'
|
36
|
+
|
37
|
+
See also /test/test_obo_parser.rb
|
38
|
+
|
39
|
+
== Utilties
|
40
|
+
|
41
|
+
A small set of methods (e.g. comparing OBO ontologies) utilizing the gem are included in utilities.rb. See /lib/utilities.rb. For example, shared labels across sets of ontologies can be found and returned.
|
14
42
|
|
15
43
|
== Copyright
|
16
44
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
data/lib/lexer.rb
CHANGED
data/lib/obo_parser.rb
CHANGED
@@ -2,113 +2,159 @@
|
|
2
2
|
# uses the PhyloTree parser/lexer engine by Krishna Dole which in turn was based on
|
3
3
|
# Thomas Mailund's <mailund@birc.dk> 'newick-1.0.5' Python library
|
4
4
|
|
5
|
-
|
5
|
+
#== Outstanding issues:
|
6
|
+
# * Better documentation
|
7
|
+
# * More tests
|
6
8
|
|
7
9
|
module OboParser
|
8
10
|
|
9
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
|
10
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
|
11
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
|
12
|
-
require File.expand_path(File.join(File.dirname(__FILE__), 'utilities'))
|
11
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
|
12
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
|
13
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
|
14
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'utilities'))
|
13
15
|
|
16
|
+
class OboParser
|
17
|
+
attr_accessor :terms, :typedefs
|
14
18
|
|
15
|
-
|
16
|
-
|
19
|
+
def initialize
|
20
|
+
@terms = []
|
21
|
+
@typedefs = []
|
22
|
+
true
|
23
|
+
end
|
17
24
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
true
|
22
|
-
end
|
25
|
+
def term_strings # :yields: Array of Strings
|
26
|
+
@terms.collect{|t| t.name.value}.sort
|
27
|
+
end
|
23
28
|
|
24
|
-
|
25
|
-
|
26
|
-
|
29
|
+
# Warning! This assumes terms are unique, they are NOT required to be so in an OBO file.
|
30
|
+
def term_hash # :yields: Hash (String => String) (name => id)
|
31
|
+
@terms.inject({}) {|sum, t| sum.update(t.name.value => t.id.value)}
|
32
|
+
end
|
27
33
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
end
|
34
|
+
def id_hash # :yields: Hash (String => String (id => name))
|
35
|
+
@terms.inject({}) {|sum, t| sum.update(t.id.value => t.name.value)}
|
36
|
+
end
|
32
37
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
38
|
+
class Stanza
|
39
|
+
# Make special reference to several specific types of tags (:name, :id), subclasses will remove additional special typs from :other_tags
|
40
|
+
attr_accessor :name, :id, :other_tags
|
41
|
+
|
42
|
+
def initialize(tags)
|
43
|
+
@other_tags = []
|
44
|
+
|
45
|
+
while tags.length != 0
|
46
|
+
t = tags.shift
|
47
|
+
|
48
|
+
new_tag = OboParser::Tag.new
|
49
|
+
new_tag.tag = t.tag
|
50
|
+
new_tag.value = t.value
|
51
|
+
new_tag.comment = t.comment
|
52
|
+
new_tag.xrefs = t.xrefs
|
53
|
+
|
54
|
+
case new_tag.tag
|
55
|
+
when 'id'
|
56
|
+
@id = new_tag
|
57
|
+
when 'name'
|
58
|
+
@name = new_tag
|
59
|
+
else
|
60
|
+
@other_tags.push(new_tag)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
37
64
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
tags.each do |t|
|
46
|
-
case t[0]
|
47
|
-
when 'id'
|
48
|
-
@id = t[1]
|
49
|
-
when 'name'
|
50
|
-
@name = t[1]
|
51
|
-
else
|
52
|
-
@tags[t[0]] = [] if !@tags[t[0]]
|
53
|
-
@tags[t[0]].push t[1]
|
65
|
+
#=== Convienience methods
|
66
|
+
|
67
|
+
def tags_named(tag_name = nil)
|
68
|
+
return nil if tag_name.nil?
|
69
|
+
result = []
|
70
|
+
@other_tags.each do |t|
|
71
|
+
result.push t if t.tag == tag_name
|
54
72
|
end
|
73
|
+
result
|
55
74
|
end
|
75
|
+
|
56
76
|
end
|
57
|
-
end
|
58
77
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
78
|
+
class Term < Stanza
|
79
|
+
attr_accessor :def
|
80
|
+
def initialize(tags)
|
81
|
+
super
|
82
|
+
|
83
|
+
anonymous_tags = []
|
84
|
+
|
85
|
+
# Loop through "unclaimed" tags and reference those specific to Term
|
86
|
+
while @other_tags.size != 0
|
87
|
+
t = @other_tags.shift
|
88
|
+
case t.tag
|
89
|
+
when 'def'
|
90
|
+
@def = t
|
91
|
+
else
|
92
|
+
anonymous_tags.push(t)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
@other_tags = anonymous_tags
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
class Typedef < Stanza
|
100
|
+
def initialize(tags)
|
101
|
+
super
|
102
|
+
#anonymous_tags = []
|
103
|
+
## Loop through "unclaimed" tags and reference those specific to Typedef
|
104
|
+
#while @other_tags.size != 0
|
105
|
+
# t = @other_tags.shift
|
106
|
+
# case t.tag
|
107
|
+
# when 'def'
|
108
|
+
# @def = t
|
109
|
+
# else
|
110
|
+
# anonymous_tags.push(t)
|
111
|
+
# end
|
112
|
+
# @other_tags = anonymous_tags
|
113
|
+
#end
|
114
|
+
end
|
63
115
|
end
|
64
|
-
end
|
65
116
|
|
66
|
-
|
67
|
-
|
68
|
-
super
|
117
|
+
class Tag
|
118
|
+
attr_accessor :tag, :value, :xrefs, :comment
|
69
119
|
end
|
120
|
+
|
70
121
|
end
|
71
122
|
|
72
|
-
|
123
|
+
class OboParserBuilder
|
124
|
+
def initialize
|
125
|
+
@of = OboParser.new
|
126
|
+
end
|
73
127
|
|
128
|
+
def add_term(tags)
|
129
|
+
@of.terms.push OboParser::Term.new(tags)
|
130
|
+
end
|
74
131
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
end
|
132
|
+
def add_typedef(tags)
|
133
|
+
@of.typedefs.push OboParser::Typedef.new(tags)
|
134
|
+
end
|
79
135
|
|
80
|
-
|
81
|
-
|
82
|
-
|
136
|
+
def obo_file
|
137
|
+
@of
|
138
|
+
end
|
83
139
|
|
84
|
-
def add_typedef(tags)
|
85
|
-
@of.typedefs.push OboParser::Typedef.new(tags)
|
86
140
|
end
|
87
141
|
|
88
|
-
|
89
|
-
@of
|
142
|
+
class ParseError < StandardError
|
90
143
|
end
|
91
144
|
|
92
|
-
end
|
93
|
-
|
94
|
-
class ParseError < StandardError
|
95
|
-
end
|
96
|
-
|
97
145
|
end # end module
|
98
146
|
|
99
|
-
|
147
|
+
#= Implementation
|
148
|
+
|
100
149
|
def parse_obo_file(input)
|
101
150
|
@input = input
|
102
|
-
|
151
|
+
raise(OboParser::ParseError, "Nothing passed to parse!") if !@input || @input.size == 0
|
152
|
+
|
153
|
+
# Comments are handled now.
|
154
|
+
# @input.gsub!(/(\s*?![^!'"]*?\n)/i, "\n") # strip out comments - this is a kludge, likely needs fixing!!
|
103
155
|
|
104
|
-
@input.gsub!(/(\s*?![^!'"]*?\n)/i, "\n") # strip out comments - this is a kludge, likely needs fixing!!
|
105
|
-
|
106
156
|
builder = OboParser::OboParserBuilder.new
|
107
157
|
lexer = OboParser::Lexer.new(@input)
|
108
158
|
OboParser::Parser.new(lexer, builder).parse_file
|
109
159
|
return builder.obo_file
|
110
160
|
end
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
data/lib/parser.rb
CHANGED
@@ -5,21 +5,21 @@ class OboParser::Parser
|
|
5
5
|
end
|
6
6
|
|
7
7
|
def parse_file
|
8
|
-
#
|
8
|
+
# At present we ignore the header lines
|
9
9
|
while !@lexer.peek(OboParser::Tokens::Term)
|
10
10
|
@lexer.pop(OboParser::Tokens::TagValuePair)
|
11
11
|
end
|
12
12
|
|
13
13
|
i = 0
|
14
14
|
while !@lexer.peek(OboParser::Tokens::Typedef) && !@lexer.peek(OboParser::Tokens::EndOfFile)
|
15
|
-
raise OboParser::ParseError, "infinite loop in Terms" if i > 10000000
|
15
|
+
raise OboParser::ParseError, "infinite loop in Terms" if i > 10000000 # there aren't that many words!
|
16
16
|
parse_term
|
17
17
|
i += 1
|
18
18
|
end
|
19
19
|
|
20
20
|
i = 0
|
21
21
|
while @lexer.peek(OboParser::Tokens::Typedef)
|
22
|
-
raise OboParser::ParseError,"infinite loop in
|
22
|
+
raise OboParser::ParseError,"infinite loop in Typedefs" if i > 1000000
|
23
23
|
parse_typedef
|
24
24
|
i += 1
|
25
25
|
end
|
@@ -29,11 +29,11 @@ class OboParser::Parser
|
|
29
29
|
t = @lexer.pop(OboParser::Tokens::Term)
|
30
30
|
tags = []
|
31
31
|
while !@lexer.peek(OboParser::Tokens::Term) && !@lexer.peek(OboParser::Tokens::Typedef) && !@lexer.peek(OboParser::Tokens::EndOfFile)
|
32
|
-
|
32
|
+
begin
|
33
33
|
t = @lexer.pop(OboParser::Tokens::TagValuePair)
|
34
|
-
tags.push
|
35
|
-
|
36
|
-
raise
|
34
|
+
tags.push(t)
|
35
|
+
rescue
|
36
|
+
raise
|
37
37
|
end
|
38
38
|
end
|
39
39
|
@builder.add_term(tags)
|
@@ -41,11 +41,14 @@ class OboParser::Parser
|
|
41
41
|
|
42
42
|
def parse_typedef
|
43
43
|
@lexer.pop(OboParser::Tokens::Typedef)
|
44
|
-
# @t = @builder.stub_typdef
|
45
44
|
tags = []
|
46
45
|
while !@lexer.peek(OboParser::Tokens::Typedef) && @lexer.peek(OboParser::Tokens::TagValuePair)
|
47
|
-
|
48
|
-
|
46
|
+
begin
|
47
|
+
t = @lexer.pop(OboParser::Tokens::TagValuePair)
|
48
|
+
tags.push(t)
|
49
|
+
rescue
|
50
|
+
raise
|
51
|
+
end
|
49
52
|
end
|
50
53
|
@builder.add_typedef(tags)
|
51
54
|
end
|
data/lib/tokens.rb
CHANGED
@@ -9,9 +9,6 @@ module OboParser::Tokens
|
|
9
9
|
end
|
10
10
|
end
|
11
11
|
|
12
|
-
# in ruby, \A is needed if you want to only match at the beginning of the string, we need this everywhere, as we're
|
13
|
-
# moving along popping off
|
14
|
-
|
15
12
|
class Term < Token
|
16
13
|
@regexp = Regexp.new(/\A\s*(\[term\])\s*/i)
|
17
14
|
end
|
@@ -20,19 +17,52 @@ module OboParser::Tokens
|
|
20
17
|
@regexp = Regexp.new(/\A\s*(\[typedef\])\s*/i)
|
21
18
|
end
|
22
19
|
|
23
|
-
|
24
20
|
class TagValuePair < Token
|
25
|
-
attr_reader :tag, :
|
26
|
-
@regexp = Regexp.new(/\A\s*([^:]+:.+)\s*\n*/i)
|
21
|
+
attr_reader :tag, :comment, :xrefs
|
22
|
+
@regexp = Regexp.new(/\A\s*([^:]+:.+)\s*\n*/i)
|
27
23
|
def initialize(str)
|
28
24
|
str.strip!
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
25
|
+
tag, value = str.split(':',2)
|
26
|
+
|
27
|
+
value.strip!
|
28
|
+
|
29
|
+
# Handle comments
|
30
|
+
if value =~ /(!\s*.+)\Z/i
|
31
|
+
@comment = $1
|
32
|
+
value.gsub!(@comment, '')
|
33
|
+
@comment.gsub!(/\A!\s*/, '')
|
34
|
+
end
|
35
|
+
|
36
|
+
# Break out the xrefs, could be made made robust
|
37
|
+
# Assumes non-quoted comma delimited in format 'foo:bar, stuff:things'
|
38
|
+
if value =~ /(\s*\[.*\]\s*)/i
|
39
|
+
xref_list = $1
|
40
|
+
value.gsub!(xref_list, '')
|
41
|
+
xref_list.strip!
|
42
|
+
xref_list = xref_list[1..-2] # strip []
|
43
|
+
@xrefs = xref_list.split(",")
|
44
|
+
end
|
33
45
|
|
34
|
-
@tag =
|
35
|
-
@value =
|
46
|
+
@tag = tag.strip
|
47
|
+
@value = value.strip
|
48
|
+
|
49
|
+
@value = @value[1..-2] if @value[0..0] == "\"" # get rid of quote marks
|
50
|
+
@value = @value[1..-2] if @value[0..0] == "'" # get rid of quote marks
|
51
|
+
|
52
|
+
@tag = @tag.strip
|
53
|
+
@value = @value.strip
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
class XrefList < Token
|
58
|
+
@regexp = Regexp.new(/\A\s*\[(.+)\]\s*\n*/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
|
59
|
+
def initialize(str)
|
60
|
+
str.strip!
|
61
|
+
@value = {}
|
62
|
+
str.split(",").each do |s|
|
63
|
+
i = s.split(":")
|
64
|
+
@value.merge!(i[0].strip => i[1].strip)
|
65
|
+
end
|
36
66
|
end
|
37
67
|
end
|
38
68
|
|
@@ -56,8 +86,6 @@ module OboParser::Tokens
|
|
56
86
|
end
|
57
87
|
end
|
58
88
|
|
59
|
-
|
60
|
-
|
61
89
|
# note we grab EOL and ; here
|
62
90
|
class ValuePair < Token
|
63
91
|
@regexp = Regexp.new(/\A\s*([\w\d\_\&]+\s*=\s*((\'[^\']+\')|(\(.*\))|(\"[^\"]+\")|([^\s\n\t;]+)))[\s\n\t;]+/i) # returns key => value hash for tokens like 'foo=bar' or foo = 'b a ar'
|
@@ -71,90 +99,73 @@ module OboParser::Tokens
|
|
71
99
|
end
|
72
100
|
end
|
73
101
|
|
74
|
-
class
|
75
|
-
@regexp = Regexp.new(
|
76
|
-
end
|
77
|
-
|
78
|
-
class RowVec < Token
|
79
|
-
@regexp = Regexp.new(/\A\s*(.+)\s*\n/i)
|
80
|
-
def initialize(str)
|
81
|
-
s = str.split(/\(|\)/).collect{|s| s=~ /[\,|\s]/ ? s.split(/[\,|\s]/) : s}.inject([]){|sum, x| x.class == Array ? sum << x.delete_if {|y| y == "" } : sum + x.strip.split(//)}
|
82
|
-
@value = s
|
83
|
-
end
|
102
|
+
class EndOfFile < Token
|
103
|
+
@regexp = Regexp.new('\A(\s*\n*)\Z')
|
84
104
|
end
|
85
105
|
|
86
|
-
|
87
|
-
|
88
106
|
## punctuation
|
89
107
|
|
90
108
|
class LBracket < Token
|
91
109
|
@regexp = Regexp.new('\A\s*(\[)\s*')
|
92
110
|
end
|
93
111
|
|
94
|
-
class
|
95
|
-
|
96
|
-
end
|
97
|
-
|
98
|
-
class
|
99
|
-
|
100
|
-
end
|
101
|
-
|
102
|
-
class RParen < Token
|
103
|
-
|
104
|
-
end
|
112
|
+
#class LParen < Token
|
113
|
+
# @regexp = Regexp.new('\A\s*(\()\s*')
|
114
|
+
#end
|
115
|
+
|
116
|
+
#class RBracket < Token
|
117
|
+
# @regexp = Regexp.new('\A\s*(\])\s*')
|
118
|
+
#end
|
119
|
+
|
120
|
+
#class RParen < Token
|
121
|
+
# @regexp = Regexp.new('\A\s*(\))\s*')
|
122
|
+
#end
|
123
|
+
|
124
|
+
#class Equals < Token
|
125
|
+
# @regexp = Regexp.new('\A\s*(=)\s*')
|
126
|
+
#end
|
127
|
+
|
128
|
+
#class BckSlash < Token
|
129
|
+
# @regexp = Regexp.new('\A\s*(\/)\s*')
|
130
|
+
#end
|
131
|
+
|
132
|
+
#class Colon < Token
|
133
|
+
# @regexp = Regexp.new('\A\s*(:)\s*')
|
134
|
+
#end
|
135
|
+
|
136
|
+
#class SemiColon < Token
|
137
|
+
# @regexp = Regexp.new('\A\s*(;)\s*')
|
138
|
+
#end
|
139
|
+
|
140
|
+
#class Comma < Token
|
141
|
+
# @regexp = Regexp.new('\A\s*(\,)\s*')
|
142
|
+
#end
|
105
143
|
|
106
|
-
class
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
end
|
118
|
-
|
119
|
-
|
120
|
-
@regexp = Regexp.new('\A\s*(;)\s*')
|
121
|
-
end
|
122
|
-
|
123
|
-
class Comma < Token
|
124
|
-
@regexp = Regexp.new('\A\s*(\,)\s*')
|
125
|
-
end
|
126
|
-
|
127
|
-
class EndOfFile < Token
|
128
|
-
@regexp = Regexp.new('\A(\s*\n*)\Z')
|
129
|
-
end
|
130
|
-
|
131
|
-
class Number < Token
|
132
|
-
@regexp = Regexp.new('\A\s*(-?\d+(\.\d+)?([eE][+-]?\d+)?)\s*')
|
133
|
-
def initialize(str)
|
134
|
-
# a little oddness here, in some case we don't want to include the .0
|
135
|
-
# see issues with numbers as labels
|
136
|
-
if str =~ /\./
|
137
|
-
@value = str.to_f
|
138
|
-
else
|
139
|
-
@value = str.to_i
|
140
|
-
end
|
141
|
-
|
142
|
-
end
|
143
|
-
end
|
144
|
-
|
145
|
-
# Tokens::NexusComment
|
146
|
-
|
147
|
-
# this list also defines priority, i.e. if tokens have overlap (which they shouldn't!!) then the earlier indexed token will match first
|
144
|
+
#class Number < Token
|
145
|
+
# @regexp = Regexp.new('\A\s*(-?\d+(\.\d+)?([eE][+-]?\d+)?)\s*')
|
146
|
+
# def initialize(str)
|
147
|
+
# # a little oddness here, in some case we don't want to include the .0
|
148
|
+
# # see issues with numbers as labels
|
149
|
+
# if str =~ /\./
|
150
|
+
# @value = str.to_f
|
151
|
+
# else
|
152
|
+
# @value = str.to_i
|
153
|
+
# end
|
154
|
+
# end
|
155
|
+
#end
|
156
|
+
|
157
|
+
# This list defines inclusion and priority, i.e. if tokens have overlap then the earlier indexed token will match first
|
148
158
|
def self.obo_file_token_list
|
149
159
|
[
|
150
160
|
OboParser::Tokens::Term,
|
151
161
|
OboParser::Tokens::Typedef,
|
152
|
-
OboParser::Tokens::TagValuePair,
|
153
|
-
OboParser::Tokens::NameValuePair, # not implemented
|
154
|
-
OboParser::Tokens::Dbxref, # not implemented
|
155
162
|
OboParser::Tokens::LBracket,
|
163
|
+
OboParser::Tokens::TagValuePair,
|
164
|
+
OboParser::Tokens::XrefList,
|
156
165
|
OboParser::Tokens::EndOfFile
|
166
|
+
# OboParser::Tokens::NameValuePair, # not implemented
|
167
|
+
# OboParser::Tokens::Dbxref, # not implemented
|
157
168
|
]
|
158
169
|
end
|
159
|
-
|
170
|
+
|
160
171
|
end
|
data/lib/utilities.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'ruby-debug'
|
3
|
-
require 'obo_parser'
|
3
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'obo_parser'))
|
4
4
|
|
5
5
|
module OboParser::Utilities
|
6
6
|
|
@@ -11,7 +11,6 @@ module OboParser::Utilities
|
|
11
11
|
# of4 = File.read('hao4.obo')
|
12
12
|
#
|
13
13
|
# OboParser::Utilities::dump_comparison_by_id([of1, of2, of3, of4])
|
14
|
-
|
15
14
|
def self.dump_comparison_by_id(files = []) # :yields: String
|
16
15
|
of = []
|
17
16
|
files.each_with_index do |f, i|
|
@@ -39,10 +38,9 @@ module OboParser::Utilities
|
|
39
38
|
end
|
40
39
|
end
|
41
40
|
|
41
|
+
# infile is a tab delimited 2 column file that contains IDs in the from FOO_1234
|
42
|
+
# The file is replicated to STDOUT replacing the ID with the Term
|
42
43
|
def self.alignment_translate(infile = nil) # :yields: String
|
43
|
-
# infile is a tab delimited 2 column file that contains IDs in the from FOO_1234
|
44
|
-
# The file is replicated to STDOUT replacing the ID with the Term
|
45
|
-
|
46
44
|
agreement = ARGV[0]
|
47
45
|
raise "Provide a file with comparison." if agreement.nil?
|
48
46
|
comparison = File.read(agreement)
|
@@ -70,22 +68,15 @@ module OboParser::Utilities
|
|
70
68
|
"\t" +
|
71
69
|
(identifiers[v2].nil? ? 'NOT FOUND' : identifiers[v2])
|
72
70
|
end
|
73
|
-
|
74
71
|
end
|
75
72
|
|
76
|
-
|
77
|
-
def self.shared_labels(files = []) # :yields: String
|
78
|
-
|
79
73
|
# Returns labels found in all passed ontologies
|
80
|
-
|
81
74
|
# Usage:
|
82
|
-
|
83
|
-
#
|
84
|
-
#
|
85
|
-
#
|
86
|
-
|
87
|
-
# shared_labels([of1, of6])
|
88
|
-
|
75
|
+
# of1 = File.read('fly_anatomy.obo')
|
76
|
+
# of2 = File.read('hao.obo')
|
77
|
+
# of3 = File.read('mosquito_anatomy.obo')
|
78
|
+
# shared_labels([of1, of6])
|
79
|
+
def self.shared_labels(files = []) # :yields: String
|
89
80
|
comparison = {}
|
90
81
|
|
91
82
|
files.each do |f|
|
@@ -109,10 +100,8 @@ module OboParser::Utilities
|
|
109
100
|
end
|
110
101
|
|
111
102
|
puts match.sort.join("\n")
|
112
|
-
|
113
103
|
puts "\n#{match.length} total."
|
114
104
|
|
115
105
|
end
|
116
|
-
|
117
106
|
|
118
107
|
end
|
data/obo_parser.gemspec
CHANGED
@@ -5,23 +5,21 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{obo_parser}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.3.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["mjy"]
|
12
|
-
s.date = %q{2011-
|
12
|
+
s.date = %q{2011-04-05}
|
13
13
|
s.description = %q{Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. }
|
14
14
|
s.email = %q{diapriid@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
16
16
|
"LICENSE",
|
17
|
-
"README",
|
18
17
|
"README.rdoc"
|
19
18
|
]
|
20
19
|
s.files = [
|
21
20
|
".document",
|
22
21
|
".gitignore",
|
23
22
|
"LICENSE",
|
24
|
-
"README",
|
25
23
|
"README.rdoc",
|
26
24
|
"Rakefile",
|
27
25
|
"VERSION",
|
@@ -43,17 +41,16 @@ Gem::Specification.new do |s|
|
|
43
41
|
s.homepage = %q{http://github.com/mjy/obo_parser}
|
44
42
|
s.rdoc_options = ["--charset=UTF-8"]
|
45
43
|
s.require_paths = ["lib"]
|
46
|
-
s.rubygems_version = %q{1.3
|
44
|
+
s.rubygems_version = %q{1.5.3}
|
47
45
|
s.summary = %q{A simple OBO file handler.}
|
48
46
|
s.test_files = [
|
49
47
|
"test/test_obo_parser.rb"
|
50
48
|
]
|
51
49
|
|
52
50
|
if s.respond_to? :specification_version then
|
53
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
54
51
|
s.specification_version = 3
|
55
52
|
|
56
|
-
if Gem::Version.new(Gem::
|
53
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
57
54
|
else
|
58
55
|
end
|
59
56
|
else
|
data/test/cell.obo
CHANGED
data/test/test_obo_parser.rb
CHANGED
@@ -2,7 +2,7 @@ require 'test/unit'
|
|
2
2
|
require 'rubygems'
|
3
3
|
require 'ruby-debug'
|
4
4
|
|
5
|
-
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/obo_parser'))
|
5
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/obo_parser'))
|
6
6
|
|
7
7
|
class OboParserTest < Test::Unit::TestCase
|
8
8
|
def test_truth
|
@@ -16,15 +16,12 @@ class Test_OboParserBuilder < Test::Unit::TestCase
|
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
19
|
-
|
20
19
|
class Test_Regex < Test::Unit::TestCase
|
21
20
|
|
22
|
-
def
|
23
|
-
|
24
|
-
txt = "line without note\nBegin taxa; ! comment\n! not this line\n'this ok!'\n\"this too!!\""
|
25
|
-
r2 = Regexp.new(/(\s*?![^!'"]*?\n)/i)
|
26
|
-
assert_equal "line without note\nBegin taxa;\n\n'this ok!'\n\"this too!!\"" , txt.gsub(r2, "\n")
|
21
|
+
def test_some_regex
|
22
|
+
assert true
|
27
23
|
end
|
24
|
+
|
28
25
|
end
|
29
26
|
|
30
27
|
class Test_Lexer < Test::Unit::TestCase
|
@@ -60,7 +57,8 @@ class Test_Lexer < Test::Unit::TestCase
|
|
60
57
|
|
61
58
|
assert t = lexer.pop(OboParser::Tokens::TagValuePair)
|
62
59
|
assert_equal 'def', t.tag
|
63
|
-
assert_equal '
|
60
|
+
assert_equal 'A chromatic scalar-circular quality inhering in an object that manifests in an observer by virtue of the dominant wavelength of the visible light; may be subject to fiat divisions, typically into 7 or 8 spectra.', t.value
|
61
|
+
assert_equal(['PATOC:cjm'], t.xrefs)
|
64
62
|
|
65
63
|
assert t = lexer.pop(OboParser::Tokens::TagValuePair)
|
66
64
|
assert_equal 'subset', t.tag
|
@@ -76,10 +74,27 @@ class Test_Lexer < Test::Unit::TestCase
|
|
76
74
|
assert lexer.pop(OboParser::Tokens::Term)
|
77
75
|
end
|
78
76
|
|
77
|
+
def test_xref_list
|
78
|
+
lexer = OboParser::Lexer.new("[foo:bar, stuff:things]")
|
79
|
+
assert t = lexer.pop(OboParser::Tokens::XrefList)
|
80
|
+
hsh = {'foo' => 'bar', 'stuff' => 'things'}
|
81
|
+
assert_equal hsh, t.value
|
82
|
+
end
|
83
|
+
|
79
84
|
def test_tagvaluepair
|
80
85
|
lexer = OboParser::Lexer.new("id: PATO:0000179")
|
81
86
|
assert lexer.pop(OboParser::Tokens::TagValuePair)
|
82
87
|
end
|
88
|
+
|
89
|
+
def test_tagvaluepair_with_comments_and_xrefs
|
90
|
+
lexer = OboParser::Lexer.new("def: \"The foo that is bar.\" [PATO:0000179] ! FOO! \n")
|
91
|
+
assert t = lexer.pop(OboParser::Tokens::TagValuePair)
|
92
|
+
assert_equal 'def', t.tag
|
93
|
+
assert_equal 'The foo that is bar.', t.value
|
94
|
+
assert_equal 'FOO!', t.comment
|
95
|
+
assert_equal(['PATO:0000179'], t.xrefs)
|
96
|
+
end
|
97
|
+
|
83
98
|
end
|
84
99
|
|
85
100
|
class Test_Parser < Test::Unit::TestCase
|
@@ -89,10 +104,13 @@ class Test_Parser < Test::Unit::TestCase
|
|
89
104
|
|
90
105
|
def test_file_parsing
|
91
106
|
foo = parse_obo_file(@of)
|
92
|
-
assert_equal 'pato', foo.terms[0].name
|
93
|
-
assert_equal 'quality', foo.terms[1].name
|
94
|
-
assert_equal 'part_of', foo.typedefs.last.name
|
95
|
-
assert_equal 'OBO_REL:part_of', foo.typedefs.last.id
|
107
|
+
assert_equal 'pato', foo.terms[0].name.value
|
108
|
+
assert_equal 'quality', foo.terms[1].name.value
|
109
|
+
assert_equal 'part_of', foo.typedefs.last.name.value
|
110
|
+
assert_equal 'OBO_REL:part_of', foo.typedefs.last.id.value
|
111
|
+
assert_equal(['PATOC:GVG'], foo.terms[1].def.xrefs)
|
112
|
+
assert_equal 'is_obsolete', foo.terms.first.tags_named('is_obsolete').first.tag
|
113
|
+
assert_equal 'true', foo.terms.first.tags_named('is_obsolete').first.value
|
96
114
|
end
|
97
115
|
|
98
116
|
def teardown
|
@@ -106,3 +124,9 @@ class Test_Parser < Test::Unit::TestCase
|
|
106
124
|
|
107
125
|
end
|
108
126
|
|
127
|
+
|
128
|
+
|
129
|
+
|
130
|
+
|
131
|
+
|
132
|
+
|
metadata
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: obo_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 19
|
5
|
+
prerelease:
|
5
6
|
segments:
|
6
7
|
- 0
|
7
|
-
-
|
8
|
-
-
|
9
|
-
version: 0.
|
8
|
+
- 3
|
9
|
+
- 0
|
10
|
+
version: 0.3.0
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
13
|
- mjy
|
@@ -14,7 +15,7 @@ autorequire:
|
|
14
15
|
bindir: bin
|
15
16
|
cert_chain: []
|
16
17
|
|
17
|
-
date: 2011-
|
18
|
+
date: 2011-04-05 00:00:00 -04:00
|
18
19
|
default_executable:
|
19
20
|
dependencies: []
|
20
21
|
|
@@ -26,13 +27,11 @@ extensions: []
|
|
26
27
|
|
27
28
|
extra_rdoc_files:
|
28
29
|
- LICENSE
|
29
|
-
- README
|
30
30
|
- README.rdoc
|
31
31
|
files:
|
32
32
|
- .document
|
33
33
|
- .gitignore
|
34
34
|
- LICENSE
|
35
|
-
- README
|
36
35
|
- README.rdoc
|
37
36
|
- Rakefile
|
38
37
|
- VERSION
|
@@ -60,23 +59,27 @@ rdoc_options:
|
|
60
59
|
require_paths:
|
61
60
|
- lib
|
62
61
|
required_ruby_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
63
|
requirements:
|
64
64
|
- - ">="
|
65
65
|
- !ruby/object:Gem::Version
|
66
|
+
hash: 3
|
66
67
|
segments:
|
67
68
|
- 0
|
68
69
|
version: "0"
|
69
70
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
70
72
|
requirements:
|
71
73
|
- - ">="
|
72
74
|
- !ruby/object:Gem::Version
|
75
|
+
hash: 3
|
73
76
|
segments:
|
74
77
|
- 0
|
75
78
|
version: "0"
|
76
79
|
requirements: []
|
77
80
|
|
78
81
|
rubyforge_project:
|
79
|
-
rubygems_version: 1.3
|
82
|
+
rubygems_version: 1.5.3
|
80
83
|
signing_key:
|
81
84
|
specification_version: 3
|
82
85
|
summary: A simple OBO file handler.
|