obo_parser 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/lexer.rb +4 -4
- data/lib/{obo_file.rb → obo_parser.rb} +10 -10
- data/lib/parser.rb +15 -15
- data/lib/tokens.rb +8 -8
- data/obo_parser.gemspec +2 -2
- data/test/test_obo_parser.rb +19 -19
- metadata +3 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/lib/lexer.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
class
|
1
|
+
class OboParser::Lexer
|
2
2
|
attr_reader :input
|
3
3
|
def initialize(input)
|
4
4
|
@input = input
|
@@ -17,7 +17,7 @@ class OboFile::Lexer
|
|
17
17
|
token = read_next_token(token_class)
|
18
18
|
@next_token = nil
|
19
19
|
if token.class != token_class
|
20
|
-
raise(
|
20
|
+
raise(OboParser::ParseError,"expected #{token_class.to_s} but received #{token.class.to_s} at #{@input[0..10]}...", caller)
|
21
21
|
else
|
22
22
|
return token
|
23
23
|
end
|
@@ -34,13 +34,13 @@ class OboFile::Lexer
|
|
34
34
|
return @next_token
|
35
35
|
else
|
36
36
|
# now check all the tokens for a match
|
37
|
-
|
37
|
+
OboParser::Tokens.obo_file_token_list.each {|t|
|
38
38
|
return @next_token if match(t)
|
39
39
|
}
|
40
40
|
end
|
41
41
|
# no match, either end of string or lex-error
|
42
42
|
if @input != ''
|
43
|
-
raise(
|
43
|
+
raise(OboParser::ParseError, "Lex Error, unknown token at #{@input[0..10]}...", caller)
|
44
44
|
else
|
45
45
|
return nil
|
46
46
|
end
|
@@ -4,14 +4,14 @@
|
|
4
4
|
|
5
5
|
# outstanding issues:
|
6
6
|
|
7
|
-
module
|
7
|
+
module OboParser
|
8
8
|
|
9
9
|
require File.expand_path(File.join(File.dirname(__FILE__), 'tokens'))
|
10
10
|
require File.expand_path(File.join(File.dirname(__FILE__), 'parser'))
|
11
11
|
require File.expand_path(File.join(File.dirname(__FILE__), 'lexer'))
|
12
12
|
|
13
13
|
|
14
|
-
class
|
14
|
+
class OboParser # Node
|
15
15
|
attr_accessor :terms, :typedefs
|
16
16
|
|
17
17
|
def initialize
|
@@ -65,17 +65,17 @@ class OboFile # Node
|
|
65
65
|
end
|
66
66
|
|
67
67
|
|
68
|
-
class
|
68
|
+
class OboParserBuilder
|
69
69
|
def initialize
|
70
|
-
@of =
|
70
|
+
@of = OboParser.new
|
71
71
|
end
|
72
72
|
|
73
73
|
def add_term(tags)
|
74
|
-
@of.terms.push
|
74
|
+
@of.terms.push OboParser::Term.new(tags)
|
75
75
|
end
|
76
76
|
|
77
77
|
def add_typedef(tags)
|
78
|
-
@of.typedefs.push
|
78
|
+
@of.typedefs.push OboParser::Typedef.new(tags)
|
79
79
|
end
|
80
80
|
|
81
81
|
def obo_file
|
@@ -92,13 +92,13 @@ end # end module
|
|
92
92
|
# the actual method
|
93
93
|
def parse_obo_file(input)
|
94
94
|
@input = input
|
95
|
-
raise(
|
95
|
+
raise(OboParser::ParseError, "Nothing passed to parse!") if !@input || @input.size == 0
|
96
96
|
|
97
97
|
@input.gsub!(/(\s*?![^!'"]*?\n)/i, "\n") # strip out comments - this is a kludge, likely needs fixing!!
|
98
98
|
|
99
|
-
builder =
|
100
|
-
lexer =
|
101
|
-
|
99
|
+
builder = OboParser::OboParserBuilder.new
|
100
|
+
lexer = OboParser::Lexer.new(@input)
|
101
|
+
OboParser::Parser.new(lexer, builder).parse_file
|
102
102
|
return builder.obo_file
|
103
103
|
end
|
104
104
|
|
data/lib/parser.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
class
|
1
|
+
class OboParser::Parser
|
2
2
|
def initialize(lexer, builder)
|
3
3
|
@lexer = lexer
|
4
4
|
@builder = builder
|
@@ -6,45 +6,45 @@ class OboFile::Parser
|
|
6
6
|
|
7
7
|
def parse_file
|
8
8
|
# toss everything right now, we just want the terms
|
9
|
-
while !@lexer.peek(
|
10
|
-
@lexer.pop(
|
9
|
+
while !@lexer.peek(OboParser::Tokens::Term)
|
10
|
+
@lexer.pop(OboParser::Tokens::TagValuePair)
|
11
11
|
end
|
12
12
|
|
13
13
|
i = 0
|
14
|
-
while !@lexer.peek(
|
15
|
-
raise
|
14
|
+
while !@lexer.peek(OboParser::Tokens::Typedef) && !@lexer.peek(OboParser::Tokens::EndOfFile)
|
15
|
+
raise OboParser::ParseError, "infinite loop in Terms" if i > 10000000
|
16
16
|
parse_term
|
17
17
|
i += 1
|
18
18
|
end
|
19
19
|
|
20
20
|
i = 0
|
21
|
-
while @lexer.peek(
|
22
|
-
raise
|
21
|
+
while @lexer.peek(OboParser::Tokens::Typedef)
|
22
|
+
raise OboParser::ParseError,"infinite loop in Terms" if i > 1000000 # there aren't that many words!
|
23
23
|
parse_typedef
|
24
24
|
i += 1
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
28
28
|
def parse_term
|
29
|
-
t = @lexer.pop(
|
29
|
+
t = @lexer.pop(OboParser::Tokens::Term)
|
30
30
|
tags = []
|
31
|
-
while !@lexer.peek(
|
32
|
-
if @lexer.peek(
|
33
|
-
t = @lexer.pop(
|
31
|
+
while !@lexer.peek(OboParser::Tokens::Term) && !@lexer.peek(OboParser::Tokens::Typedef) && !@lexer.peek(OboParser::Tokens::EndOfFile)
|
32
|
+
if @lexer.peek(OboParser::Tokens::TagValuePair)
|
33
|
+
t = @lexer.pop(OboParser::Tokens::TagValuePair)
|
34
34
|
tags.push [t.tag, t.value]
|
35
35
|
else
|
36
|
-
raise(
|
36
|
+
raise(OboParser::ParseError, "Expected a tag-value pair, but did not get one following this tag/value: [#{t.tag} / #{t.value}]")
|
37
37
|
end
|
38
38
|
end
|
39
39
|
@builder.add_term(tags)
|
40
40
|
end
|
41
41
|
|
42
42
|
def parse_typedef
|
43
|
-
@lexer.pop(
|
43
|
+
@lexer.pop(OboParser::Tokens::Typedef)
|
44
44
|
# @t = @builder.stub_typdef
|
45
45
|
tags = []
|
46
|
-
while !@lexer.peek(
|
47
|
-
t = @lexer.pop(
|
46
|
+
while !@lexer.peek(OboParser::Tokens::Typedef) && @lexer.peek(OboParser::Tokens::TagValuePair)
|
47
|
+
t = @lexer.pop(OboParser::Tokens::TagValuePair)
|
48
48
|
tags.push [t.tag, t.value]
|
49
49
|
end
|
50
50
|
@builder.add_typedef(tags)
|
data/lib/tokens.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
module
|
1
|
+
module OboParser::Tokens
|
2
2
|
|
3
3
|
class Token
|
4
4
|
# this allows access the the class attribute regexp, without using a class variable
|
@@ -147,13 +147,13 @@ module OboFile::Tokens
|
|
147
147
|
# this list also defines priority, i.e. if tokens have overlap (which they shouldn't!!) then the earlier indexed token will match first
|
148
148
|
def self.obo_file_token_list
|
149
149
|
[
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
150
|
+
OboParser::Tokens::Term,
|
151
|
+
OboParser::Tokens::Typedef,
|
152
|
+
OboParser::Tokens::TagValuePair,
|
153
|
+
OboParser::Tokens::NameValuePair, # not implemented
|
154
|
+
OboParser::Tokens::Dbxref, # not implemented
|
155
|
+
OboParser::Tokens::LBracket,
|
156
|
+
OboParser::Tokens::EndOfFile
|
157
157
|
]
|
158
158
|
end
|
159
159
|
|
data/obo_parser.gemspec
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{obo_parser}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["mjy"]
|
@@ -28,7 +28,7 @@ Gem::Specification.new do |s|
|
|
28
28
|
"init.rb",
|
29
29
|
"install.rb",
|
30
30
|
"lib/lexer.rb",
|
31
|
-
"lib/
|
31
|
+
"lib/obo_parser.rb",
|
32
32
|
"lib/parser.rb",
|
33
33
|
"lib/tokens.rb",
|
34
34
|
"obo_parser.gemspec",
|
data/test/test_obo_parser.rb
CHANGED
@@ -2,7 +2,7 @@ require 'test/unit'
|
|
2
2
|
require 'rubygems'
|
3
3
|
require 'ruby-debug'
|
4
4
|
|
5
|
-
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/
|
5
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/obo_parser'))
|
6
6
|
|
7
7
|
class OboParserTest < Test::Unit::TestCase
|
8
8
|
def test_truth
|
@@ -10,9 +10,9 @@ class OboParserTest < Test::Unit::TestCase
|
|
10
10
|
end
|
11
11
|
end
|
12
12
|
|
13
|
-
class
|
13
|
+
class Test_OboParserBuilder < Test::Unit::TestCase
|
14
14
|
def test_builder
|
15
|
-
b =
|
15
|
+
b = OboParser::OboParserBuilder.new
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
@@ -30,16 +30,16 @@ end
|
|
30
30
|
class Test_Lexer < Test::Unit::TestCase
|
31
31
|
|
32
32
|
def test_term
|
33
|
-
lexer =
|
34
|
-
assert lexer.pop(
|
33
|
+
lexer = OboParser::Lexer.new("[Term]")
|
34
|
+
assert lexer.pop(OboParser::Tokens::Term)
|
35
35
|
end
|
36
36
|
|
37
37
|
def test_end_of_file
|
38
|
-
lexer =
|
39
|
-
assert lexer.pop(
|
38
|
+
lexer = OboParser::Lexer.new(" \n\n")
|
39
|
+
assert lexer.pop(OboParser::Tokens::EndOfFile)
|
40
40
|
|
41
|
-
lexer =
|
42
|
-
assert lexer.pop(
|
41
|
+
lexer = OboParser::Lexer.new("\n")
|
42
|
+
assert lexer.pop(OboParser::Tokens::EndOfFile)
|
43
43
|
end
|
44
44
|
|
45
45
|
def test_parse_term_stanza
|
@@ -49,37 +49,37 @@ class Test_Lexer < Test::Unit::TestCase
|
|
49
49
|
def: "A chromatic scalar-circular quality inhering in an object that manifests in an observer by virtue of the dominant wavelength of the visible light; may be subject to fiat divisions, typically into 7 or 8 spectra." [PATOC:cjm]
|
50
50
|
subset: attribute_slim
|
51
51
|
is_a: PATO:0001301'
|
52
|
-
lexer =
|
53
|
-
assert t = lexer.pop(
|
52
|
+
lexer = OboParser::Lexer.new(input)
|
53
|
+
assert t = lexer.pop(OboParser::Tokens::TagValuePair)
|
54
54
|
assert_equal 'id', t.tag
|
55
55
|
assert_equal 'PATO:0000015', t.value
|
56
56
|
|
57
|
-
assert t = lexer.pop(
|
57
|
+
assert t = lexer.pop(OboParser::Tokens::TagValuePair)
|
58
58
|
assert_equal 'name', t.tag
|
59
59
|
assert_equal 'color hue', t.value
|
60
60
|
|
61
|
-
assert t = lexer.pop(
|
61
|
+
assert t = lexer.pop(OboParser::Tokens::TagValuePair)
|
62
62
|
assert_equal 'def', t.tag
|
63
63
|
assert_equal '"A chromatic scalar-circular quality inhering in an object that manifests in an observer by virtue of the dominant wavelength of the visible light; may be subject to fiat divisions, typically into 7 or 8 spectra." [PATOC:cjm]', t.value
|
64
64
|
|
65
|
-
assert t = lexer.pop(
|
65
|
+
assert t = lexer.pop(OboParser::Tokens::TagValuePair)
|
66
66
|
assert_equal 'subset', t.tag
|
67
67
|
assert_equal 'attribute_slim', t.value
|
68
68
|
|
69
|
-
assert t = lexer.pop(
|
69
|
+
assert t = lexer.pop(OboParser::Tokens::TagValuePair)
|
70
70
|
assert_equal 'is_a', t.tag
|
71
71
|
assert_equal 'PATO:0001301', t.value
|
72
72
|
end
|
73
73
|
|
74
74
|
|
75
75
|
def test_parse_term
|
76
|
-
lexer =
|
77
|
-
assert lexer.pop(
|
76
|
+
lexer = OboParser::Lexer.new("[Term]")
|
77
|
+
assert lexer.pop(OboParser::Tokens::Term)
|
78
78
|
end
|
79
79
|
|
80
80
|
def test_tagvaluepair
|
81
|
-
lexer =
|
82
|
-
assert lexer.pop(
|
81
|
+
lexer = OboParser::Lexer.new("id: PATO:0000179")
|
82
|
+
assert lexer.pop(OboParser::Tokens::TagValuePair)
|
83
83
|
end
|
84
84
|
end
|
85
85
|
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
7
|
+
- 2
|
8
8
|
- 0
|
9
|
-
version: 0.
|
9
|
+
version: 0.2.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- mjy
|
@@ -39,7 +39,7 @@ files:
|
|
39
39
|
- init.rb
|
40
40
|
- install.rb
|
41
41
|
- lib/lexer.rb
|
42
|
-
- lib/
|
42
|
+
- lib/obo_parser.rb
|
43
43
|
- lib/parser.rb
|
44
44
|
- lib/tokens.rb
|
45
45
|
- obo_parser.gemspec
|