obo_parser 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/lexer.rb +4 -4
- data/lib/{obo_file.rb → obo_parser.rb} +10 -10
- data/lib/parser.rb +15 -15
- data/lib/tokens.rb +8 -8
- data/obo_parser.gemspec +2 -2
- data/test/test_obo_parser.rb +19 -19
- metadata +3 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/lib/lexer.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
class
|
1
|
+
class OboParser::Lexer
|
2
2
|
attr_reader :input
|
3
3
|
def initialize(input)
|
4
4
|
@input = input
|
@@ -17,7 +17,7 @@ class OboFile::Lexer
|
|
17
17
|
token = read_next_token(token_class)
|
18
18
|
@next_token = nil
|
19
19
|
if token.class != token_class
|
20
|
-
raise(
|
20
|
+
raise(OboParser::ParseError,"expected #{token_class.to_s} but received #{token.class.to_s} at #{@input[0..10]}...", caller)
|
21
21
|
else
|
22
22
|
return token
|
23
23
|
end
|
@@ -34,13 +34,13 @@ class OboFile::Lexer
|
|
34
34
|
return @next_token
|
35
35
|
else
|
36
36
|
# now check all the tokens for a match
|
37
|
-
|
37
|
+
OboParser::Tokens.obo_file_token_list.each {|t|
|
38
38
|
return @next_token if match(t)
|
39
39
|
}
|
40
40
|
end
|
41
41
|
# no match, either end of string or lex-error
|
42
42
|
if @input != ''
|
43
|
-
raise(
|
43
|
+
raise(OboParser::ParseError, "Lex Error, unknown token at #{@input[0..10]}...", caller)
|
44
44
|
else
|
45
45
|
return nil
|
46
46
|
end
|
@@ -4,14 +4,14 @@
|
|
4
4
|
|
5
5
|
# outstanding issues:
|
6
6
|
|
7
|
-
module
|
7
|
+
module OboParser
|
8
8
|
|
9
9
|
require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
|
10
10
|
require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
|
11
11
|
require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
|
12
12
|
|
13
13
|
|
14
|
-
class
|
14
|
+
class OboParser # Node
|
15
15
|
attr_accessor :terms, :typedefs
|
16
16
|
|
17
17
|
def initialize
|
@@ -65,17 +65,17 @@ class OboFile # Node
|
|
65
65
|
end
|
66
66
|
|
67
67
|
|
68
|
-
class
|
68
|
+
class OboParserBuilder
|
69
69
|
def initialize
|
70
|
-
@of =
|
70
|
+
@of = OboParser.new
|
71
71
|
end
|
72
72
|
|
73
73
|
def add_term(tags)
|
74
|
-
@of.terms.push
|
74
|
+
@of.terms.push OboParser::Term.new(tags)
|
75
75
|
end
|
76
76
|
|
77
77
|
def add_typedef(tags)
|
78
|
-
@of.typedefs.push
|
78
|
+
@of.typedefs.push OboParser::Typedef.new(tags)
|
79
79
|
end
|
80
80
|
|
81
81
|
def obo_file
|
@@ -92,13 +92,13 @@ end # end module
|
|
92
92
|
# the actual method
|
93
93
|
def parse_obo_file(input)
|
94
94
|
@input = input
|
95
|
-
raise(
|
95
|
+
raise(OboParser::ParseError, "Nothing passed to parse!") if !@input || @input.size == 0
|
96
96
|
|
97
97
|
@input.gsub!(/(\s*?![^!'"]*?\n)/i, "\n") # strip out comments - this is a kludge, likely needs fixing!!
|
98
98
|
|
99
|
-
builder =
|
100
|
-
lexer =
|
101
|
-
|
99
|
+
builder = OboParser::OboParserBuilder.new
|
100
|
+
lexer = OboParser::Lexer.new(@input)
|
101
|
+
OboParser::Parser.new(lexer, builder).parse_file
|
102
102
|
return builder.obo_file
|
103
103
|
end
|
104
104
|
|
data/lib/parser.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
class
|
1
|
+
class OboParser::Parser
|
2
2
|
def initialize(lexer, builder)
|
3
3
|
@lexer = lexer
|
4
4
|
@builder = builder
|
@@ -6,45 +6,45 @@ class OboFile::Parser
|
|
6
6
|
|
7
7
|
def parse_file
|
8
8
|
# toss everything right now, we just want the terms
|
9
|
-
while !@lexer.peek(
|
10
|
-
@lexer.pop(
|
9
|
+
while !@lexer.peek(OboParser::Tokens::Term)
|
10
|
+
@lexer.pop(OboParser::Tokens::TagValuePair)
|
11
11
|
end
|
12
12
|
|
13
13
|
i = 0
|
14
|
-
while !@lexer.peek(
|
15
|
-
raise
|
14
|
+
while !@lexer.peek(OboParser::Tokens::Typedef) && !@lexer.peek(OboParser::Tokens::EndOfFile)
|
15
|
+
raise OboParser::ParseError, "infinite loop in Terms" if i > 10000000
|
16
16
|
parse_term
|
17
17
|
i += 1
|
18
18
|
end
|
19
19
|
|
20
20
|
i = 0
|
21
|
-
while @lexer.peek(
|
22
|
-
raise
|
21
|
+
while @lexer.peek(OboParser::Tokens::Typedef)
|
22
|
+
raise OboParser::ParseError,"infinite loop in Terms" if i > 1000000 # there aren't that many words!
|
23
23
|
parse_typedef
|
24
24
|
i += 1
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
28
28
|
def parse_term
|
29
|
-
t = @lexer.pop(
|
29
|
+
t = @lexer.pop(OboParser::Tokens::Term)
|
30
30
|
tags = []
|
31
|
-
while !@lexer.peek(
|
32
|
-
if @lexer.peek(
|
33
|
-
t = @lexer.pop(
|
31
|
+
while !@lexer.peek(OboParser::Tokens::Term) && !@lexer.peek(OboParser::Tokens::Typedef) && !@lexer.peek(OboParser::Tokens::EndOfFile)
|
32
|
+
if @lexer.peek(OboParser::Tokens::TagValuePair)
|
33
|
+
t = @lexer.pop(OboParser::Tokens::TagValuePair)
|
34
34
|
tags.push [t.tag, t.value]
|
35
35
|
else
|
36
|
-
raise(
|
36
|
+
raise(OboParser::ParseError, "Expected a tag-value pair, but did not get one following this tag/value: [#{t.tag} / #{t.value}]")
|
37
37
|
end
|
38
38
|
end
|
39
39
|
@builder.add_term(tags)
|
40
40
|
end
|
41
41
|
|
42
42
|
def parse_typedef
|
43
|
-
@lexer.pop(
|
43
|
+
@lexer.pop(OboParser::Tokens::Typedef)
|
44
44
|
# @t = @builder.stub_typdef
|
45
45
|
tags = []
|
46
|
-
while !@lexer.peek(
|
47
|
-
t = @lexer.pop(
|
46
|
+
while !@lexer.peek(OboParser::Tokens::Typedef) && @lexer.peek(OboParser::Tokens::TagValuePair)
|
47
|
+
t = @lexer.pop(OboParser::Tokens::TagValuePair)
|
48
48
|
tags.push [t.tag, t.value]
|
49
49
|
end
|
50
50
|
@builder.add_typedef(tags)
|
data/lib/tokens.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
module
|
1
|
+
module OboParser::Tokens
|
2
2
|
|
3
3
|
class Token
|
4
4
|
# this allows access the the class attribute regexp, without using a class variable
|
@@ -147,13 +147,13 @@ module OboFile::Tokens
|
|
147
147
|
# this list also defines priority, i.e. if tokens have overlap (which they shouldn't!!) then the earlier indexed token will match first
|
148
148
|
def self.obo_file_token_list
|
149
149
|
[
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
150
|
+
OboParser::Tokens::Term,
|
151
|
+
OboParser::Tokens::Typedef,
|
152
|
+
OboParser::Tokens::TagValuePair,
|
153
|
+
OboParser::Tokens::NameValuePair, # not implemented
|
154
|
+
OboParser::Tokens::Dbxref, # not implemented
|
155
|
+
OboParser::Tokens::LBracket,
|
156
|
+
OboParser::Tokens::EndOfFile
|
157
157
|
]
|
158
158
|
end
|
159
159
|
|
data/obo_parser.gemspec
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{obo_parser}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["mjy"]
|
@@ -28,7 +28,7 @@ Gem::Specification.new do |s|
|
|
28
28
|
"init.rb",
|
29
29
|
"install.rb",
|
30
30
|
"lib/lexer.rb",
|
31
|
-
"lib/
|
31
|
+
"lib/obo_parser.rb",
|
32
32
|
"lib/parser.rb",
|
33
33
|
"lib/tokens.rb",
|
34
34
|
"obo_parser.gemspec",
|
data/test/test_obo_parser.rb
CHANGED
@@ -2,7 +2,7 @@ require 'test/unit'
|
|
2
2
|
require 'rubygems'
|
3
3
|
require 'ruby-debug'
|
4
4
|
|
5
|
-
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/
|
5
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/obo_parser'))
|
6
6
|
|
7
7
|
class OboParserTest < Test::Unit::TestCase
|
8
8
|
def test_truth
|
@@ -10,9 +10,9 @@ class OboParserTest < Test::Unit::TestCase
|
|
10
10
|
end
|
11
11
|
end
|
12
12
|
|
13
|
-
class
|
13
|
+
class Test_OboParserBuilder < Test::Unit::TestCase
|
14
14
|
def test_builder
|
15
|
-
b =
|
15
|
+
b = OboParser::OboParserBuilder.new
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
@@ -30,16 +30,16 @@ end
|
|
30
30
|
class Test_Lexer < Test::Unit::TestCase
|
31
31
|
|
32
32
|
def test_term
|
33
|
-
lexer =
|
34
|
-
assert lexer.pop(
|
33
|
+
lexer = OboParser::Lexer.new("[Term]")
|
34
|
+
assert lexer.pop(OboParser::Tokens::Term)
|
35
35
|
end
|
36
36
|
|
37
37
|
def test_end_of_file
|
38
|
-
lexer =
|
39
|
-
assert lexer.pop(
|
38
|
+
lexer = OboParser::Lexer.new(" \n\n")
|
39
|
+
assert lexer.pop(OboParser::Tokens::EndOfFile)
|
40
40
|
|
41
|
-
lexer =
|
42
|
-
assert lexer.pop(
|
41
|
+
lexer = OboParser::Lexer.new("\n")
|
42
|
+
assert lexer.pop(OboParser::Tokens::EndOfFile)
|
43
43
|
end
|
44
44
|
|
45
45
|
def test_parse_term_stanza
|
@@ -49,37 +49,37 @@ class Test_Lexer < Test::Unit::TestCase
|
|
49
49
|
def: "A chromatic scalar-circular quality inhering in an object that manifests in an observer by virtue of the dominant wavelength of the visible light; may be subject to fiat divisions, typically into 7 or 8 spectra." [PATOC:cjm]
|
50
50
|
subset: attribute_slim
|
51
51
|
is_a: PATO:0001301'
|
52
|
-
lexer =
|
53
|
-
assert t = lexer.pop(
|
52
|
+
lexer = OboParser::Lexer.new(input)
|
53
|
+
assert t = lexer.pop(OboParser::Tokens::TagValuePair)
|
54
54
|
assert_equal 'id', t.tag
|
55
55
|
assert_equal 'PATO:0000015', t.value
|
56
56
|
|
57
|
-
assert t = lexer.pop(
|
57
|
+
assert t = lexer.pop(OboParser::Tokens::TagValuePair)
|
58
58
|
assert_equal 'name', t.tag
|
59
59
|
assert_equal 'color hue', t.value
|
60
60
|
|
61
|
-
assert t = lexer.pop(
|
61
|
+
assert t = lexer.pop(OboParser::Tokens::TagValuePair)
|
62
62
|
assert_equal 'def', t.tag
|
63
63
|
assert_equal '"A chromatic scalar-circular quality inhering in an object that manifests in an observer by virtue of the dominant wavelength of the visible light; may be subject to fiat divisions, typically into 7 or 8 spectra." [PATOC:cjm]', t.value
|
64
64
|
|
65
|
-
assert t = lexer.pop(
|
65
|
+
assert t = lexer.pop(OboParser::Tokens::TagValuePair)
|
66
66
|
assert_equal 'subset', t.tag
|
67
67
|
assert_equal 'attribute_slim', t.value
|
68
68
|
|
69
|
-
assert t = lexer.pop(
|
69
|
+
assert t = lexer.pop(OboParser::Tokens::TagValuePair)
|
70
70
|
assert_equal 'is_a', t.tag
|
71
71
|
assert_equal 'PATO:0001301', t.value
|
72
72
|
end
|
73
73
|
|
74
74
|
|
75
75
|
def test_parse_term
|
76
|
-
lexer =
|
77
|
-
assert lexer.pop(
|
76
|
+
lexer = OboParser::Lexer.new("[Term]")
|
77
|
+
assert lexer.pop(OboParser::Tokens::Term)
|
78
78
|
end
|
79
79
|
|
80
80
|
def test_tagvaluepair
|
81
|
-
lexer =
|
82
|
-
assert lexer.pop(
|
81
|
+
lexer = OboParser::Lexer.new("id: PATO:0000179")
|
82
|
+
assert lexer.pop(OboParser::Tokens::TagValuePair)
|
83
83
|
end
|
84
84
|
end
|
85
85
|
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
7
|
+
- 2
|
8
8
|
- 0
|
9
|
-
version: 0.
|
9
|
+
version: 0.2.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- mjy
|
@@ -39,7 +39,7 @@ files:
|
|
39
39
|
- init.rb
|
40
40
|
- install.rb
|
41
41
|
- lib/lexer.rb
|
42
|
-
- lib/
|
42
|
+
- lib/obo_parser.rb
|
43
43
|
- lib/parser.rb
|
44
44
|
- lib/tokens.rb
|
45
45
|
- obo_parser.gemspec
|