fabulator-grammar 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +22 -0
- data/Rakefile +3 -1
- data/VERSION +1 -1
- data/features/grammar.feature +116 -12
- data/features/step_definitions/expression_steps.rb +2 -2
- data/features/step_definitions/grammar_steps.rb +46 -2
- data/features/step_definitions/xml_steps.rb +5 -16
- data/features/support/env.rb +1 -0
- data/lib/fabulator-grammar.rb +1 -0
- data/lib/fabulator/grammar.rb +12 -3
- data/lib/fabulator/grammar/actions.rb +17 -7
- data/lib/fabulator/grammar/actions/context.rb +18 -0
- data/lib/fabulator/grammar/actions/grammar.rb +76 -0
- data/lib/fabulator/grammar/actions/rule.rb +51 -0
- data/lib/fabulator/grammar/actions/token.rb +27 -0
- data/lib/fabulator/grammar/actions/when.rb +35 -0
- data/lib/fabulator/grammar/cursor.rb +118 -0
- data/lib/fabulator/grammar/expr/anchor.rb +28 -0
- data/lib/fabulator/grammar/expr/char_set.rb +67 -18
- data/lib/fabulator/grammar/expr/look_ahead.rb +44 -0
- data/lib/fabulator/grammar/expr/rule.rb +33 -28
- data/lib/fabulator/grammar/expr/rule_alternative.rb +45 -0
- data/lib/fabulator/grammar/expr/rule_mode.rb +16 -0
- data/lib/fabulator/grammar/expr/rule_ref.rb +15 -4
- data/lib/fabulator/grammar/expr/rule_sequence.rb +59 -0
- data/lib/fabulator/grammar/expr/sequence.rb +7 -1
- data/lib/fabulator/grammar/expr/set_skip.rb +16 -0
- data/lib/fabulator/grammar/expr/text.rb +8 -0
- data/lib/fabulator/grammar/expr/{rules.rb → token.rb} +12 -1
- data/lib/fabulator/grammar/expr/token_alternative.rb +42 -0
- data/lib/fabulator/grammar/rule_parser.rb +667 -0
- data/lib/fabulator/grammar/token_parser.rb +733 -0
- data/rules.racc +249 -0
- data/tokens.racc +257 -0
- metadata +29 -12
- data/lib/fabulator/grammar/parser.rb +0 -548
- data/regex.racc +0 -183
@@ -0,0 +1,51 @@
|
|
1
|
+
module Fabulator
|
2
|
+
module Grammar
|
3
|
+
module Actions
|
4
|
+
class Rule < Fabulator::Structural
|
5
|
+
|
6
|
+
namespace GRAMMAR_NS
|
7
|
+
|
8
|
+
attribute :name, :static => true
|
9
|
+
attribute :mode, :default => "default", :static => true
|
10
|
+
|
11
|
+
contains :when, :as => :choices
|
12
|
+
|
13
|
+
has_actions
|
14
|
+
|
15
|
+
def initialize(g = nil)
|
16
|
+
@grammar = g
|
17
|
+
@choices = [ ]
|
18
|
+
end
|
19
|
+
|
20
|
+
def parse(cursor)
|
21
|
+
# try each when...
|
22
|
+
best_attempt = nil
|
23
|
+
@choices.each do |choice|
|
24
|
+
cursor.attempt do |c|
|
25
|
+
ret = choice.parse(c)
|
26
|
+
if !ret.nil?
|
27
|
+
score = choice.score(cursor.context, ret)
|
28
|
+
if best_attempt.nil? || best_attempt[:score] < score
|
29
|
+
best_attempt = {
|
30
|
+
:score => score,
|
31
|
+
:choice => choice,
|
32
|
+
:ret => ret
|
33
|
+
}
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
return nil if best_attempt.nil?
|
39
|
+
choice = best_attempt[:choice]
|
40
|
+
ret = best_attempt[:ret]
|
41
|
+
if choice.has_actions?
|
42
|
+
ctx = cursor.context.with_root(cursor.context.root.anon_node(nil))
|
43
|
+
ctx.merge_data(ret)
|
44
|
+
choice.run(ctx)
|
45
|
+
end
|
46
|
+
return ret
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Fabulator
|
2
|
+
module Grammar
|
3
|
+
module Actions
|
4
|
+
class Token < Fabulator::Structural
|
5
|
+
|
6
|
+
namespace GRAMMAR_NS
|
7
|
+
|
8
|
+
attribute :name, :static => true
|
9
|
+
attribute :mode, :default => "default", :static => true
|
10
|
+
attribute :matches, :static => true
|
11
|
+
|
12
|
+
def compile_xml(xml, ctx = nil)
|
13
|
+
super
|
14
|
+
|
15
|
+
parser = Fabulator::Grammar::TokenParser.new
|
16
|
+
|
17
|
+
# parse @matches
|
18
|
+
@c_matches = parser.parse(self.matches)
|
19
|
+
end
|
20
|
+
|
21
|
+
def parse(cursor)
|
22
|
+
@c_matches.parse(cursor)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Fabulator
|
2
|
+
module Grammar
|
3
|
+
module Actions
|
4
|
+
class When < Fabulator::Structural
|
5
|
+
|
6
|
+
namespace GRAMMAR_NS
|
7
|
+
|
8
|
+
attribute :matches, :static => true
|
9
|
+
attribute :score, :eval => true
|
10
|
+
|
11
|
+
has_actions
|
12
|
+
|
13
|
+
def compile_xml(xml, ctx = nil)
|
14
|
+
super
|
15
|
+
|
16
|
+
parser = Fabulator::Grammar::RuleParser.new
|
17
|
+
|
18
|
+
# parse @matches
|
19
|
+
@c_matches = parser.parse(self.matches)
|
20
|
+
end
|
21
|
+
|
22
|
+
def parse(cursor)
|
23
|
+
@c_matches.parse(cursor)
|
24
|
+
end
|
25
|
+
|
26
|
+
def score(context, data)
|
27
|
+
return 0 if @score.nil?
|
28
|
+
ctx = context.with_root(context.root.anon_node(nil))
|
29
|
+
ctx.merge_data(data)
|
30
|
+
(self.score(ctx).value rescue 0)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
module Fabulator
|
2
|
+
module Grammar
|
3
|
+
class Cursor
|
4
|
+
attr_accessor :mode, :skip
|
5
|
+
|
6
|
+
def initialize(g,ctx,s)
|
7
|
+
@source = s
|
8
|
+
@grammar = g
|
9
|
+
@curpos = 0
|
10
|
+
@end = @source.length-1
|
11
|
+
@line = 0
|
12
|
+
@col = 0
|
13
|
+
@anchored = false
|
14
|
+
@mode = :default
|
15
|
+
@skip = nil
|
16
|
+
@context = ctx.with_root(ctx.root.anon_node(nil))
|
17
|
+
end
|
18
|
+
|
19
|
+
def context
|
20
|
+
@context
|
21
|
+
end
|
22
|
+
|
23
|
+
def pos
|
24
|
+
@curpos
|
25
|
+
end
|
26
|
+
|
27
|
+
def resync(pat)
|
28
|
+
until self.eof || @source[@curpos..@source.length-1] =~ %r{^#{pat}}
|
29
|
+
@curpos += 1
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def eof
|
34
|
+
@curpos > @end
|
35
|
+
end
|
36
|
+
|
37
|
+
def advance_position(i)
|
38
|
+
@curpos += i if i > 0
|
39
|
+
end
|
40
|
+
|
41
|
+
def anchored
|
42
|
+
@anchored
|
43
|
+
end
|
44
|
+
|
45
|
+
def anchored=(t)
|
46
|
+
@anchored = t
|
47
|
+
end
|
48
|
+
|
49
|
+
def grammar
|
50
|
+
@grammar
|
51
|
+
end
|
52
|
+
|
53
|
+
def point
|
54
|
+
{ :curpos => @curpos, :line => @line, :col => @col, :root => @context.root, :mode => @mode, :anchored => @anchored, :skip => @skip }
|
55
|
+
end
|
56
|
+
|
57
|
+
def point=(p)
|
58
|
+
@curpos = p[:curpos]
|
59
|
+
@line = p[:line]
|
60
|
+
@col = p[:col]
|
61
|
+
@mode = p[:mode]
|
62
|
+
@anchored = p[:anchored]
|
63
|
+
@skip = p[:skip]
|
64
|
+
@context.root = p[:root]
|
65
|
+
end
|
66
|
+
|
67
|
+
def attempt(&block)
|
68
|
+
saved = self.point
|
69
|
+
ret = yield self
|
70
|
+
if ret.nil?
|
71
|
+
self.point = saved
|
72
|
+
return nil
|
73
|
+
end
|
74
|
+
|
75
|
+
return ret
|
76
|
+
end
|
77
|
+
|
78
|
+
def find_rule(nom)
|
79
|
+
r = @grammar.get_rule(@mode, nom)
|
80
|
+
if r.nil? && @mode.to_s != 'default'
|
81
|
+
r = @grammar.get_rule('default', nom)
|
82
|
+
end
|
83
|
+
r
|
84
|
+
end
|
85
|
+
|
86
|
+
def data
|
87
|
+
@context
|
88
|
+
end
|
89
|
+
|
90
|
+
def do_skip
|
91
|
+
if !@skip.nil?
|
92
|
+
my_skip = @skip
|
93
|
+
new_pos = @curpos
|
94
|
+
self.attempt do |cursor|
|
95
|
+
cursor.skip = nil
|
96
|
+
cursor.anchored
|
97
|
+
r = my_skip.parse(cursor)
|
98
|
+
while !r.nil?
|
99
|
+
r = my_skip.parse(cursor)
|
100
|
+
end
|
101
|
+
new_pos = cursor.pos
|
102
|
+
end
|
103
|
+
@curpos = new_pos
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def match_token(regex)
|
108
|
+
res = nil
|
109
|
+
do_skip
|
110
|
+
if @source[@curpos .. @end] =~ %r{^(#{regex})}
|
111
|
+
res = $1.to_s
|
112
|
+
@curpos += res.length
|
113
|
+
end
|
114
|
+
res
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Fabulator
|
2
|
+
module Grammar
|
3
|
+
module Expr
|
4
|
+
class Anchor
|
5
|
+
def initialize(t)
|
6
|
+
@anchor = t
|
7
|
+
end
|
8
|
+
|
9
|
+
def name
|
10
|
+
nil
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse(source)
|
14
|
+
ret = nil
|
15
|
+
case @anchor
|
16
|
+
when :start_of_string:
|
17
|
+
ret = source.pos == 0 ? {} : nil
|
18
|
+
when :start_of_line:
|
19
|
+
when :end_of_string:
|
20
|
+
ret = source.eof ? {} : nil
|
21
|
+
when :end_of_line:
|
22
|
+
end
|
23
|
+
ret
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -1,45 +1,94 @@
|
|
1
|
+
require 'bitset'
|
2
|
+
|
1
3
|
module Fabulator::Grammar::Expr
|
2
4
|
class CharSet
|
3
|
-
def initialize(cs)
|
4
|
-
|
5
|
-
ranges = ""
|
5
|
+
def initialize(cs = "")
|
6
|
+
@set = BitSet.new
|
6
7
|
if cs[0..0] == '-'
|
7
|
-
|
8
|
+
@set.on(('-')[0])
|
8
9
|
cs = cs[1..cs.length-1]
|
9
10
|
end
|
10
11
|
bits = cs.split(/-/) # to pull out ranges
|
11
12
|
if bits.size == 1
|
12
|
-
|
13
|
-
|
13
|
+
bits[0].each_char{ |c|
|
14
|
+
@set.on(c[0])
|
15
|
+
}
|
16
|
+
elsif bits.size > 1
|
14
17
|
if bits[0].size > 1
|
15
|
-
|
18
|
+
@set.on(bits[0][0])
|
16
19
|
end
|
17
20
|
while(bits.size > 1)
|
18
21
|
b = bits.shift
|
19
22
|
if b.size > 2
|
20
|
-
|
23
|
+
b[1..b.size-2].each_char { |c| @set.on(c[0]) }
|
21
24
|
end
|
22
|
-
|
25
|
+
@set.on(b[b.size-1] .. bits[0][0])
|
23
26
|
end
|
24
27
|
if bits[0].size > 1
|
25
|
-
|
28
|
+
bits[0][1..bits[0].size-2].each_char { |c|
|
29
|
+
@set.on(c[0])
|
30
|
+
}
|
26
31
|
end
|
27
32
|
end
|
28
|
-
chars = chars.collect{ |cc| Regexp.quote(cc) }.join('')
|
29
|
-
@set = chars + ranges
|
30
|
-
@inverted = false
|
31
33
|
end
|
32
34
|
|
33
|
-
def
|
34
|
-
@
|
35
|
+
def set
|
36
|
+
@set
|
37
|
+
end
|
38
|
+
|
39
|
+
def or(c)
|
40
|
+
@set = @set | c.set
|
41
|
+
self
|
42
|
+
end
|
43
|
+
|
44
|
+
def but_not(c)
|
45
|
+
@set = @set - c.set
|
46
|
+
self
|
47
|
+
end
|
48
|
+
|
49
|
+
# for now, we restrict ourselves to 8-bit characters
|
50
|
+
def universal
|
51
|
+
@set.on(0..0xff)
|
35
52
|
end
|
36
53
|
|
37
54
|
def to_regex
|
38
|
-
|
39
|
-
|
55
|
+
# want a compact set of ranges for the regex
|
56
|
+
set_def = ''
|
57
|
+
@set.to_ary.each do |r|
|
58
|
+
if r.is_a?(Range)
|
59
|
+
set_def += Regexp.quote(r.begin.to_i.chr) + '-' + Regexp.quote(r.end.to_i.chr)
|
60
|
+
else
|
61
|
+
set_def += Regexp.quote(r.to_i.chr)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
if set_def == ''
|
65
|
+
return %r{.}
|
40
66
|
else
|
41
|
-
%r{}
|
67
|
+
%r{[#{set_def}]}
|
42
68
|
end
|
43
69
|
end
|
44
70
|
end
|
71
|
+
|
72
|
+
class CharClass < CharSet
|
73
|
+
@@charsets = {
|
74
|
+
'alnum' => [ 0x30 .. 0x39, 0x41 .. 0x5a, 0x61 .. 0x7a ],
|
75
|
+
'alpha' => [ 0x41 .. 0x5a, 0x61 .. 0x7a ],
|
76
|
+
'ascii' => [ 0x00 .. 0x7f ],
|
77
|
+
'blank' => [ 0x0b, 0x20 ], # \t + space
|
78
|
+
'cntrl' => [ 0x00 .. 0x1f, 0x7f ],
|
79
|
+
'digit' => [ 0x30 .. 0x39 ],
|
80
|
+
'graph' => [ 0x21 .. 0x7e ],
|
81
|
+
'lower' => [ 0x61 .. 0x7a ],
|
82
|
+
'print' => [ 0x20 .. 0x7e ],
|
83
|
+
'space' => [ 0x0a, 0x0b, 0x0c, 0x0f, 0x20 ], # \t\r\n\v\f + space
|
84
|
+
'upper' => [ 0x41 .. 0x5a ],
|
85
|
+
'word' => [ 0x30 .. 0x39, 0x41 .. 0x5a, 0x61 .. 0x7a, '_'[0] ],
|
86
|
+
'xdigit'=> [ 0x30 .. 0x39, 0x41 .. 0x46, 0x61 .. 0x66 ],
|
87
|
+
'nl' => [ 0x0a, 0x0c ]
|
88
|
+
}
|
89
|
+
|
90
|
+
def initialize(cs)
|
91
|
+
@set = BitSet.new.on(@@charsets[cs.downcase] || [])
|
92
|
+
end
|
93
|
+
end
|
45
94
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Fabulator
|
2
|
+
module Grammar
|
3
|
+
module Expr
|
4
|
+
class LookAhead
|
5
|
+
def initialize(sequence)
|
6
|
+
@sequence = sequence
|
7
|
+
end
|
8
|
+
|
9
|
+
def name
|
10
|
+
nil
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse(source)
|
14
|
+
ret = nil
|
15
|
+
source.attempt do |c|
|
16
|
+
ret = @sequence.parse(c)
|
17
|
+
nil
|
18
|
+
end
|
19
|
+
ret.nil? ? nil : {}
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class NegLookAhead
|
24
|
+
def initialize(sequence)
|
25
|
+
@sequence = sequence
|
26
|
+
end
|
27
|
+
|
28
|
+
def name
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
|
32
|
+
def parse(source)
|
33
|
+
ret = nil
|
34
|
+
source.attempt do |c|
|
35
|
+
ret = @sequence.parse(c)
|
36
|
+
nil
|
37
|
+
end
|
38
|
+
ret.nil? ? {} : nil
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -1,35 +1,40 @@
|
|
1
|
-
module Fabulator
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
@anchor_end = false
|
7
|
-
end
|
8
|
-
|
9
|
-
def anchor_start
|
10
|
-
@anchor_start = true
|
11
|
-
end
|
1
|
+
module Fabulator
|
2
|
+
module Grammar
|
3
|
+
module Expr
|
4
|
+
class Rule
|
5
|
+
attr_accessor :name
|
12
6
|
|
13
|
-
|
14
|
-
|
15
|
-
|
7
|
+
def initialize
|
8
|
+
@alternatives = [ ]
|
9
|
+
end
|
16
10
|
|
17
|
-
|
18
|
-
|
19
|
-
|
11
|
+
def add_alternative(a)
|
12
|
+
@alternatives << a
|
13
|
+
end
|
20
14
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
15
|
+
def parse(s)
|
16
|
+
if s.anchored
|
17
|
+
@alternatives.each do |alternative|
|
18
|
+
ret = s.attempt { |cursor|
|
19
|
+
cursor.anchored = true
|
20
|
+
alternative.parse(cursor)
|
21
|
+
}
|
22
|
+
return ret unless ret.nil?
|
23
|
+
end
|
24
|
+
else
|
25
|
+
while !s.eof
|
26
|
+
@alternatives.each do |alternative|
|
27
|
+
ret = s.attempt { |cursor|
|
28
|
+
cursor.anchored = true
|
29
|
+
alternative.parse(cursor)
|
30
|
+
}
|
31
|
+
return ret unless ret.nil?
|
32
|
+
end
|
33
|
+
s.advance_position(1)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
return nil
|
28
37
|
end
|
29
|
-
elsif @anchor_end
|
30
|
-
%r{#{r}$}
|
31
|
-
else
|
32
|
-
r
|
33
38
|
end
|
34
39
|
end
|
35
40
|
end
|