fabulator-grammar 0.0.1 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +22 -0
- data/Rakefile +3 -1
- data/VERSION +1 -1
- data/features/grammar.feature +116 -12
- data/features/step_definitions/expression_steps.rb +2 -2
- data/features/step_definitions/grammar_steps.rb +46 -2
- data/features/step_definitions/xml_steps.rb +5 -16
- data/features/support/env.rb +1 -0
- data/lib/fabulator-grammar.rb +1 -0
- data/lib/fabulator/grammar.rb +12 -3
- data/lib/fabulator/grammar/actions.rb +17 -7
- data/lib/fabulator/grammar/actions/context.rb +18 -0
- data/lib/fabulator/grammar/actions/grammar.rb +76 -0
- data/lib/fabulator/grammar/actions/rule.rb +51 -0
- data/lib/fabulator/grammar/actions/token.rb +27 -0
- data/lib/fabulator/grammar/actions/when.rb +35 -0
- data/lib/fabulator/grammar/cursor.rb +118 -0
- data/lib/fabulator/grammar/expr/anchor.rb +28 -0
- data/lib/fabulator/grammar/expr/char_set.rb +67 -18
- data/lib/fabulator/grammar/expr/look_ahead.rb +44 -0
- data/lib/fabulator/grammar/expr/rule.rb +33 -28
- data/lib/fabulator/grammar/expr/rule_alternative.rb +45 -0
- data/lib/fabulator/grammar/expr/rule_mode.rb +16 -0
- data/lib/fabulator/grammar/expr/rule_ref.rb +15 -4
- data/lib/fabulator/grammar/expr/rule_sequence.rb +59 -0
- data/lib/fabulator/grammar/expr/sequence.rb +7 -1
- data/lib/fabulator/grammar/expr/set_skip.rb +16 -0
- data/lib/fabulator/grammar/expr/text.rb +8 -0
- data/lib/fabulator/grammar/expr/{rules.rb → token.rb} +12 -1
- data/lib/fabulator/grammar/expr/token_alternative.rb +42 -0
- data/lib/fabulator/grammar/rule_parser.rb +667 -0
- data/lib/fabulator/grammar/token_parser.rb +733 -0
- data/rules.racc +249 -0
- data/tokens.racc +257 -0
- metadata +29 -12
- data/lib/fabulator/grammar/parser.rb +0 -548
- data/regex.racc +0 -183
@@ -0,0 +1,51 @@
|
|
1
|
+
module Fabulator
|
2
|
+
module Grammar
|
3
|
+
module Actions
|
4
|
+
class Rule < Fabulator::Structural
|
5
|
+
|
6
|
+
namespace GRAMMAR_NS
|
7
|
+
|
8
|
+
attribute :name, :static => true
|
9
|
+
attribute :mode, :default => "default", :static => true
|
10
|
+
|
11
|
+
contains :when, :as => :choices
|
12
|
+
|
13
|
+
has_actions
|
14
|
+
|
15
|
+
def initialize(g = nil)
|
16
|
+
@grammar = g
|
17
|
+
@choices = [ ]
|
18
|
+
end
|
19
|
+
|
20
|
+
def parse(cursor)
|
21
|
+
# try each when...
|
22
|
+
best_attempt = nil
|
23
|
+
@choices.each do |choice|
|
24
|
+
cursor.attempt do |c|
|
25
|
+
ret = choice.parse(c)
|
26
|
+
if !ret.nil?
|
27
|
+
score = choice.score(cursor.context, ret)
|
28
|
+
if best_attempt.nil? || best_attempt[:score] < score
|
29
|
+
best_attempt = {
|
30
|
+
:score => score,
|
31
|
+
:choice => choice,
|
32
|
+
:ret => ret
|
33
|
+
}
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
return nil if best_attempt.nil?
|
39
|
+
choice = best_attempt[:choice]
|
40
|
+
ret = best_attempt[:ret]
|
41
|
+
if choice.has_actions?
|
42
|
+
ctx = cursor.context.with_root(cursor.context.root.anon_node(nil))
|
43
|
+
ctx.merge_data(ret)
|
44
|
+
choice.run(ctx)
|
45
|
+
end
|
46
|
+
return ret
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Fabulator
|
2
|
+
module Grammar
|
3
|
+
module Actions
|
4
|
+
class Token < Fabulator::Structural
|
5
|
+
|
6
|
+
namespace GRAMMAR_NS
|
7
|
+
|
8
|
+
attribute :name, :static => true
|
9
|
+
attribute :mode, :default => "default", :static => true
|
10
|
+
attribute :matches, :static => true
|
11
|
+
|
12
|
+
def compile_xml(xml, ctx = nil)
|
13
|
+
super
|
14
|
+
|
15
|
+
parser = Fabulator::Grammar::TokenParser.new
|
16
|
+
|
17
|
+
# parse @matches
|
18
|
+
@c_matches = parser.parse(self.matches)
|
19
|
+
end
|
20
|
+
|
21
|
+
def parse(cursor)
|
22
|
+
@c_matches.parse(cursor)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Fabulator
|
2
|
+
module Grammar
|
3
|
+
module Actions
|
4
|
+
class When < Fabulator::Structural
|
5
|
+
|
6
|
+
namespace GRAMMAR_NS
|
7
|
+
|
8
|
+
attribute :matches, :static => true
|
9
|
+
attribute :score, :eval => true
|
10
|
+
|
11
|
+
has_actions
|
12
|
+
|
13
|
+
def compile_xml(xml, ctx = nil)
|
14
|
+
super
|
15
|
+
|
16
|
+
parser = Fabulator::Grammar::RuleParser.new
|
17
|
+
|
18
|
+
# parse @matches
|
19
|
+
@c_matches = parser.parse(self.matches)
|
20
|
+
end
|
21
|
+
|
22
|
+
def parse(cursor)
|
23
|
+
@c_matches.parse(cursor)
|
24
|
+
end
|
25
|
+
|
26
|
+
def score(context, data)
|
27
|
+
return 0 if @score.nil?
|
28
|
+
ctx = context.with_root(context.root.anon_node(nil))
|
29
|
+
ctx.merge_data(data)
|
30
|
+
(self.score(ctx).value rescue 0)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
module Fabulator
|
2
|
+
module Grammar
|
3
|
+
class Cursor
|
4
|
+
attr_accessor :mode, :skip
|
5
|
+
|
6
|
+
def initialize(g,ctx,s)
|
7
|
+
@source = s
|
8
|
+
@grammar = g
|
9
|
+
@curpos = 0
|
10
|
+
@end = @source.length-1
|
11
|
+
@line = 0
|
12
|
+
@col = 0
|
13
|
+
@anchored = false
|
14
|
+
@mode = :default
|
15
|
+
@skip = nil
|
16
|
+
@context = ctx.with_root(ctx.root.anon_node(nil))
|
17
|
+
end
|
18
|
+
|
19
|
+
def context
|
20
|
+
@context
|
21
|
+
end
|
22
|
+
|
23
|
+
def pos
|
24
|
+
@curpos
|
25
|
+
end
|
26
|
+
|
27
|
+
def resync(pat)
|
28
|
+
until self.eof || @source[@curpos..@source.length-1] =~ %r{^#{pat}}
|
29
|
+
@curpos += 1
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def eof
|
34
|
+
@curpos > @end
|
35
|
+
end
|
36
|
+
|
37
|
+
def advance_position(i)
|
38
|
+
@curpos += i if i > 0
|
39
|
+
end
|
40
|
+
|
41
|
+
def anchored
|
42
|
+
@anchored
|
43
|
+
end
|
44
|
+
|
45
|
+
def anchored=(t)
|
46
|
+
@anchored = t
|
47
|
+
end
|
48
|
+
|
49
|
+
def grammar
|
50
|
+
@grammar
|
51
|
+
end
|
52
|
+
|
53
|
+
def point
|
54
|
+
{ :curpos => @curpos, :line => @line, :col => @col, :root => @context.root, :mode => @mode, :anchored => @anchored, :skip => @skip }
|
55
|
+
end
|
56
|
+
|
57
|
+
def point=(p)
|
58
|
+
@curpos = p[:curpos]
|
59
|
+
@line = p[:line]
|
60
|
+
@col = p[:col]
|
61
|
+
@mode = p[:mode]
|
62
|
+
@anchored = p[:anchored]
|
63
|
+
@skip = p[:skip]
|
64
|
+
@context.root = p[:root]
|
65
|
+
end
|
66
|
+
|
67
|
+
def attempt(&block)
|
68
|
+
saved = self.point
|
69
|
+
ret = yield self
|
70
|
+
if ret.nil?
|
71
|
+
self.point = saved
|
72
|
+
return nil
|
73
|
+
end
|
74
|
+
|
75
|
+
return ret
|
76
|
+
end
|
77
|
+
|
78
|
+
def find_rule(nom)
|
79
|
+
r = @grammar.get_rule(@mode, nom)
|
80
|
+
if r.nil? && @mode.to_s != 'default'
|
81
|
+
r = @grammar.get_rule('default', nom)
|
82
|
+
end
|
83
|
+
r
|
84
|
+
end
|
85
|
+
|
86
|
+
def data
|
87
|
+
@context
|
88
|
+
end
|
89
|
+
|
90
|
+
def do_skip
|
91
|
+
if !@skip.nil?
|
92
|
+
my_skip = @skip
|
93
|
+
new_pos = @curpos
|
94
|
+
self.attempt do |cursor|
|
95
|
+
cursor.skip = nil
|
96
|
+
cursor.anchored
|
97
|
+
r = my_skip.parse(cursor)
|
98
|
+
while !r.nil?
|
99
|
+
r = my_skip.parse(cursor)
|
100
|
+
end
|
101
|
+
new_pos = cursor.pos
|
102
|
+
end
|
103
|
+
@curpos = new_pos
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def match_token(regex)
|
108
|
+
res = nil
|
109
|
+
do_skip
|
110
|
+
if @source[@curpos .. @end] =~ %r{^(#{regex})}
|
111
|
+
res = $1.to_s
|
112
|
+
@curpos += res.length
|
113
|
+
end
|
114
|
+
res
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Fabulator
|
2
|
+
module Grammar
|
3
|
+
module Expr
|
4
|
+
class Anchor
|
5
|
+
def initialize(t)
|
6
|
+
@anchor = t
|
7
|
+
end
|
8
|
+
|
9
|
+
def name
|
10
|
+
nil
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse(source)
|
14
|
+
ret = nil
|
15
|
+
case @anchor
|
16
|
+
when :start_of_string:
|
17
|
+
ret = source.pos == 0 ? {} : nil
|
18
|
+
when :start_of_line:
|
19
|
+
when :end_of_string:
|
20
|
+
ret = source.eof ? {} : nil
|
21
|
+
when :end_of_line:
|
22
|
+
end
|
23
|
+
ret
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -1,45 +1,94 @@
|
|
1
|
+
require 'bitset'
|
2
|
+
|
1
3
|
module Fabulator::Grammar::Expr
|
2
4
|
class CharSet
|
3
|
-
def initialize(cs)
|
4
|
-
|
5
|
-
ranges = ""
|
5
|
+
def initialize(cs = "")
|
6
|
+
@set = BitSet.new
|
6
7
|
if cs[0..0] == '-'
|
7
|
-
|
8
|
+
@set.on(('-')[0])
|
8
9
|
cs = cs[1..cs.length-1]
|
9
10
|
end
|
10
11
|
bits = cs.split(/-/) # to pull out ranges
|
11
12
|
if bits.size == 1
|
12
|
-
|
13
|
-
|
13
|
+
bits[0].each_char{ |c|
|
14
|
+
@set.on(c[0])
|
15
|
+
}
|
16
|
+
elsif bits.size > 1
|
14
17
|
if bits[0].size > 1
|
15
|
-
|
18
|
+
@set.on(bits[0][0])
|
16
19
|
end
|
17
20
|
while(bits.size > 1)
|
18
21
|
b = bits.shift
|
19
22
|
if b.size > 2
|
20
|
-
|
23
|
+
b[1..b.size-2].each_char { |c| @set.on(c[0]) }
|
21
24
|
end
|
22
|
-
|
25
|
+
@set.on(b[b.size-1] .. bits[0][0])
|
23
26
|
end
|
24
27
|
if bits[0].size > 1
|
25
|
-
|
28
|
+
bits[0][1..bits[0].size-2].each_char { |c|
|
29
|
+
@set.on(c[0])
|
30
|
+
}
|
26
31
|
end
|
27
32
|
end
|
28
|
-
chars = chars.collect{ |cc| Regexp.quote(cc) }.join('')
|
29
|
-
@set = chars + ranges
|
30
|
-
@inverted = false
|
31
33
|
end
|
32
34
|
|
33
|
-
def
|
34
|
-
@
|
35
|
+
def set
|
36
|
+
@set
|
37
|
+
end
|
38
|
+
|
39
|
+
def or(c)
|
40
|
+
@set = @set | c.set
|
41
|
+
self
|
42
|
+
end
|
43
|
+
|
44
|
+
def but_not(c)
|
45
|
+
@set = @set - c.set
|
46
|
+
self
|
47
|
+
end
|
48
|
+
|
49
|
+
# for now, we restrict ourselves to 8-bit characters
|
50
|
+
def universal
|
51
|
+
@set.on(0..0xff)
|
35
52
|
end
|
36
53
|
|
37
54
|
def to_regex
|
38
|
-
|
39
|
-
|
55
|
+
# want a compact set of ranges for the regex
|
56
|
+
set_def = ''
|
57
|
+
@set.to_ary.each do |r|
|
58
|
+
if r.is_a?(Range)
|
59
|
+
set_def += Regexp.quote(r.begin.to_i.chr) + '-' + Regexp.quote(r.end.to_i.chr)
|
60
|
+
else
|
61
|
+
set_def += Regexp.quote(r.to_i.chr)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
if set_def == ''
|
65
|
+
return %r{.}
|
40
66
|
else
|
41
|
-
%r{}
|
67
|
+
%r{[#{set_def}]}
|
42
68
|
end
|
43
69
|
end
|
44
70
|
end
|
71
|
+
|
72
|
+
class CharClass < CharSet
|
73
|
+
@@charsets = {
|
74
|
+
'alnum' => [ 0x30 .. 0x39, 0x41 .. 0x5a, 0x61 .. 0x7a ],
|
75
|
+
'alpha' => [ 0x41 .. 0x5a, 0x61 .. 0x7a ],
|
76
|
+
'ascii' => [ 0x00 .. 0x7f ],
|
77
|
+
'blank' => [ 0x0b, 0x20 ], # \t + space
|
78
|
+
'cntrl' => [ 0x00 .. 0x1f, 0x7f ],
|
79
|
+
'digit' => [ 0x30 .. 0x39 ],
|
80
|
+
'graph' => [ 0x21 .. 0x7e ],
|
81
|
+
'lower' => [ 0x61 .. 0x7a ],
|
82
|
+
'print' => [ 0x20 .. 0x7e ],
|
83
|
+
'space' => [ 0x0a, 0x0b, 0x0c, 0x0f, 0x20 ], # \t\r\n\v\f + space
|
84
|
+
'upper' => [ 0x41 .. 0x5a ],
|
85
|
+
'word' => [ 0x30 .. 0x39, 0x41 .. 0x5a, 0x61 .. 0x7a, '_'[0] ],
|
86
|
+
'xdigit'=> [ 0x30 .. 0x39, 0x41 .. 0x46, 0x61 .. 0x66 ],
|
87
|
+
'nl' => [ 0x0a, 0x0c ]
|
88
|
+
}
|
89
|
+
|
90
|
+
def initialize(cs)
|
91
|
+
@set = BitSet.new.on(@@charsets[cs.downcase] || [])
|
92
|
+
end
|
93
|
+
end
|
45
94
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Fabulator
|
2
|
+
module Grammar
|
3
|
+
module Expr
|
4
|
+
class LookAhead
|
5
|
+
def initialize(sequence)
|
6
|
+
@sequence = sequence
|
7
|
+
end
|
8
|
+
|
9
|
+
def name
|
10
|
+
nil
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse(source)
|
14
|
+
ret = nil
|
15
|
+
source.attempt do |c|
|
16
|
+
ret = @sequence.parse(c)
|
17
|
+
nil
|
18
|
+
end
|
19
|
+
ret.nil? ? nil : {}
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class NegLookAhead
|
24
|
+
def initialize(sequence)
|
25
|
+
@sequence = sequence
|
26
|
+
end
|
27
|
+
|
28
|
+
def name
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
|
32
|
+
def parse(source)
|
33
|
+
ret = nil
|
34
|
+
source.attempt do |c|
|
35
|
+
ret = @sequence.parse(c)
|
36
|
+
nil
|
37
|
+
end
|
38
|
+
ret.nil? ? {} : nil
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -1,35 +1,40 @@
|
|
1
|
-
module Fabulator
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
@anchor_end = false
|
7
|
-
end
|
8
|
-
|
9
|
-
def anchor_start
|
10
|
-
@anchor_start = true
|
11
|
-
end
|
1
|
+
module Fabulator
|
2
|
+
module Grammar
|
3
|
+
module Expr
|
4
|
+
class Rule
|
5
|
+
attr_accessor :name
|
12
6
|
|
13
|
-
|
14
|
-
|
15
|
-
|
7
|
+
def initialize
|
8
|
+
@alternatives = [ ]
|
9
|
+
end
|
16
10
|
|
17
|
-
|
18
|
-
|
19
|
-
|
11
|
+
def add_alternative(a)
|
12
|
+
@alternatives << a
|
13
|
+
end
|
20
14
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
15
|
+
def parse(s)
|
16
|
+
if s.anchored
|
17
|
+
@alternatives.each do |alternative|
|
18
|
+
ret = s.attempt { |cursor|
|
19
|
+
cursor.anchored = true
|
20
|
+
alternative.parse(cursor)
|
21
|
+
}
|
22
|
+
return ret unless ret.nil?
|
23
|
+
end
|
24
|
+
else
|
25
|
+
while !s.eof
|
26
|
+
@alternatives.each do |alternative|
|
27
|
+
ret = s.attempt { |cursor|
|
28
|
+
cursor.anchored = true
|
29
|
+
alternative.parse(cursor)
|
30
|
+
}
|
31
|
+
return ret unless ret.nil?
|
32
|
+
end
|
33
|
+
s.advance_position(1)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
return nil
|
28
37
|
end
|
29
|
-
elsif @anchor_end
|
30
|
-
%r{#{r}$}
|
31
|
-
else
|
32
|
-
r
|
33
38
|
end
|
34
39
|
end
|
35
40
|
end
|