fabulator-grammar 0.0.1 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. data/History.txt +22 -0
  2. data/Rakefile +3 -1
  3. data/VERSION +1 -1
  4. data/features/grammar.feature +116 -12
  5. data/features/step_definitions/expression_steps.rb +2 -2
  6. data/features/step_definitions/grammar_steps.rb +46 -2
  7. data/features/step_definitions/xml_steps.rb +5 -16
  8. data/features/support/env.rb +1 -0
  9. data/lib/fabulator-grammar.rb +1 -0
  10. data/lib/fabulator/grammar.rb +12 -3
  11. data/lib/fabulator/grammar/actions.rb +17 -7
  12. data/lib/fabulator/grammar/actions/context.rb +18 -0
  13. data/lib/fabulator/grammar/actions/grammar.rb +76 -0
  14. data/lib/fabulator/grammar/actions/rule.rb +51 -0
  15. data/lib/fabulator/grammar/actions/token.rb +27 -0
  16. data/lib/fabulator/grammar/actions/when.rb +35 -0
  17. data/lib/fabulator/grammar/cursor.rb +118 -0
  18. data/lib/fabulator/grammar/expr/anchor.rb +28 -0
  19. data/lib/fabulator/grammar/expr/char_set.rb +67 -18
  20. data/lib/fabulator/grammar/expr/look_ahead.rb +44 -0
  21. data/lib/fabulator/grammar/expr/rule.rb +33 -28
  22. data/lib/fabulator/grammar/expr/rule_alternative.rb +45 -0
  23. data/lib/fabulator/grammar/expr/rule_mode.rb +16 -0
  24. data/lib/fabulator/grammar/expr/rule_ref.rb +15 -4
  25. data/lib/fabulator/grammar/expr/rule_sequence.rb +59 -0
  26. data/lib/fabulator/grammar/expr/sequence.rb +7 -1
  27. data/lib/fabulator/grammar/expr/set_skip.rb +16 -0
  28. data/lib/fabulator/grammar/expr/text.rb +8 -0
  29. data/lib/fabulator/grammar/expr/{rules.rb → token.rb} +12 -1
  30. data/lib/fabulator/grammar/expr/token_alternative.rb +42 -0
  31. data/lib/fabulator/grammar/rule_parser.rb +667 -0
  32. data/lib/fabulator/grammar/token_parser.rb +733 -0
  33. data/rules.racc +249 -0
  34. data/tokens.racc +257 -0
  35. metadata +29 -12
  36. data/lib/fabulator/grammar/parser.rb +0 -548
  37. data/regex.racc +0 -183
@@ -0,0 +1,51 @@
1
+ module Fabulator
2
+ module Grammar
3
+ module Actions
4
+ class Rule < Fabulator::Structural
5
+
6
+ namespace GRAMMAR_NS
7
+
8
+ attribute :name, :static => true
9
+ attribute :mode, :default => "default", :static => true
10
+
11
+ contains :when, :as => :choices
12
+
13
+ has_actions
14
+
15
+ def initialize(g = nil)
16
+ @grammar = g
17
+ @choices = [ ]
18
+ end
19
+
20
+ def parse(cursor)
21
+ # try each when...
22
+ best_attempt = nil
23
+ @choices.each do |choice|
24
+ cursor.attempt do |c|
25
+ ret = choice.parse(c)
26
+ if !ret.nil?
27
+ score = choice.score(cursor.context, ret)
28
+ if best_attempt.nil? || best_attempt[:score] < score
29
+ best_attempt = {
30
+ :score => score,
31
+ :choice => choice,
32
+ :ret => ret
33
+ }
34
+ end
35
+ end
36
+ end
37
+ end
38
+ return nil if best_attempt.nil?
39
+ choice = best_attempt[:choice]
40
+ ret = best_attempt[:ret]
41
+ if choice.has_actions?
42
+ ctx = cursor.context.with_root(cursor.context.root.anon_node(nil))
43
+ ctx.merge_data(ret)
44
+ choice.run(ctx)
45
+ end
46
+ return ret
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,27 @@
1
+ module Fabulator
2
+ module Grammar
3
+ module Actions
4
+ class Token < Fabulator::Structural
5
+
6
+ namespace GRAMMAR_NS
7
+
8
+ attribute :name, :static => true
9
+ attribute :mode, :default => "default", :static => true
10
+ attribute :matches, :static => true
11
+
12
+ def compile_xml(xml, ctx = nil)
13
+ super
14
+
15
+ parser = Fabulator::Grammar::TokenParser.new
16
+
17
+ # parse @matches
18
+ @c_matches = parser.parse(self.matches)
19
+ end
20
+
21
+ def parse(cursor)
22
+ @c_matches.parse(cursor)
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,35 @@
1
+ module Fabulator
2
+ module Grammar
3
+ module Actions
4
+ class When < Fabulator::Structural
5
+
6
+ namespace GRAMMAR_NS
7
+
8
+ attribute :matches, :static => true
9
+ attribute :score, :eval => true
10
+
11
+ has_actions
12
+
13
+ def compile_xml(xml, ctx = nil)
14
+ super
15
+
16
+ parser = Fabulator::Grammar::RuleParser.new
17
+
18
+ # parse @matches
19
+ @c_matches = parser.parse(self.matches)
20
+ end
21
+
22
+ def parse(cursor)
23
+ @c_matches.parse(cursor)
24
+ end
25
+
26
+ def score(context, data)
27
+ return 0 if @score.nil?
28
+ ctx = context.with_root(context.root.anon_node(nil))
29
+ ctx.merge_data(data)
30
+ (self.score(ctx).value rescue 0)
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,118 @@
1
+ module Fabulator
2
+ module Grammar
3
+ class Cursor
4
+ attr_accessor :mode, :skip
5
+
6
+ def initialize(g,ctx,s)
7
+ @source = s
8
+ @grammar = g
9
+ @curpos = 0
10
+ @end = @source.length-1
11
+ @line = 0
12
+ @col = 0
13
+ @anchored = false
14
+ @mode = :default
15
+ @skip = nil
16
+ @context = ctx.with_root(ctx.root.anon_node(nil))
17
+ end
18
+
19
+ def context
20
+ @context
21
+ end
22
+
23
+ def pos
24
+ @curpos
25
+ end
26
+
27
+ def resync(pat)
28
+ until self.eof || @source[@curpos..@source.length-1] =~ %r{^#{pat}}
29
+ @curpos += 1
30
+ end
31
+ end
32
+
33
+ def eof
34
+ @curpos > @end
35
+ end
36
+
37
+ def advance_position(i)
38
+ @curpos += i if i > 0
39
+ end
40
+
41
+ def anchored
42
+ @anchored
43
+ end
44
+
45
+ def anchored=(t)
46
+ @anchored = t
47
+ end
48
+
49
+ def grammar
50
+ @grammar
51
+ end
52
+
53
+ def point
54
+ { :curpos => @curpos, :line => @line, :col => @col, :root => @context.root, :mode => @mode, :anchored => @anchored, :skip => @skip }
55
+ end
56
+
57
+ def point=(p)
58
+ @curpos = p[:curpos]
59
+ @line = p[:line]
60
+ @col = p[:col]
61
+ @mode = p[:mode]
62
+ @anchored = p[:anchored]
63
+ @skip = p[:skip]
64
+ @context.root = p[:root]
65
+ end
66
+
67
+ def attempt(&block)
68
+ saved = self.point
69
+ ret = yield self
70
+ if ret.nil?
71
+ self.point = saved
72
+ return nil
73
+ end
74
+
75
+ return ret
76
+ end
77
+
78
+ def find_rule(nom)
79
+ r = @grammar.get_rule(@mode, nom)
80
+ if r.nil? && @mode.to_s != 'default'
81
+ r = @grammar.get_rule('default', nom)
82
+ end
83
+ r
84
+ end
85
+
86
+ def data
87
+ @context
88
+ end
89
+
90
+ def do_skip
91
+ if !@skip.nil?
92
+ my_skip = @skip
93
+ new_pos = @curpos
94
+ self.attempt do |cursor|
95
+ cursor.skip = nil
96
+ cursor.anchored
97
+ r = my_skip.parse(cursor)
98
+ while !r.nil?
99
+ r = my_skip.parse(cursor)
100
+ end
101
+ new_pos = cursor.pos
102
+ end
103
+ @curpos = new_pos
104
+ end
105
+ end
106
+
107
+ def match_token(regex)
108
+ res = nil
109
+ do_skip
110
+ if @source[@curpos .. @end] =~ %r{^(#{regex})}
111
+ res = $1.to_s
112
+ @curpos += res.length
113
+ end
114
+ res
115
+ end
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,28 @@
1
+ module Fabulator
2
+ module Grammar
3
+ module Expr
4
+ class Anchor
5
+ def initialize(t)
6
+ @anchor = t
7
+ end
8
+
9
+ def name
10
+ nil
11
+ end
12
+
13
+ def parse(source)
14
+ ret = nil
15
+ case @anchor
16
+ when :start_of_string:
17
+ ret = source.pos == 0 ? {} : nil
18
+ when :start_of_line:
19
+ when :end_of_string:
20
+ ret = source.eof ? {} : nil
21
+ when :end_of_line:
22
+ end
23
+ ret
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -1,45 +1,94 @@
1
+ require 'bitset'
2
+
1
3
  module Fabulator::Grammar::Expr
2
4
  class CharSet
3
- def initialize(cs)
4
- chars = ""
5
- ranges = ""
5
+ def initialize(cs = "")
6
+ @set = BitSet.new
6
7
  if cs[0..0] == '-'
7
- chars = '-'
8
+ @set.on(('-')[0])
8
9
  cs = cs[1..cs.length-1]
9
10
  end
10
11
  bits = cs.split(/-/) # to pull out ranges
11
12
  if bits.size == 1
12
- chars = bits[0]
13
- else
13
+ bits[0].each_char{ |c|
14
+ @set.on(c[0])
15
+ }
16
+ elsif bits.size > 1
14
17
  if bits[0].size > 1
15
- chars += b[0..0]
18
+ @set.on(bits[0][0])
16
19
  end
17
20
  while(bits.size > 1)
18
21
  b = bits.shift
19
22
  if b.size > 2
20
- chars += b[1..b.size-2]
23
+ b[1..b.size-2].each_char { |c| @set.on(c[0]) }
21
24
  end
22
- ranges += Regexp.quote(b[b.size-1 .. b.size-1]) + '-' + Regexp.quote(bits[0][0..0])
25
+ @set.on(b[b.size-1] .. bits[0][0])
23
26
  end
24
27
  if bits[0].size > 1
25
- chars += bits[0][1..bits[0].size-1]
28
+ bits[0][1..bits[0].size-2].each_char { |c|
29
+ @set.on(c[0])
30
+ }
26
31
  end
27
32
  end
28
- chars = chars.collect{ |cc| Regexp.quote(cc) }.join('')
29
- @set = chars + ranges
30
- @inverted = false
31
33
  end
32
34
 
33
- def inverted
34
- @inverted = true
35
+ def set
36
+ @set
37
+ end
38
+
39
+ def or(c)
40
+ @set = @set | c.set
41
+ self
42
+ end
43
+
44
+ def but_not(c)
45
+ @set = @set - c.set
46
+ self
47
+ end
48
+
49
+ # for now, we restrict ourselves to 8-bit characters
50
+ def universal
51
+ @set.on(0..0xff)
35
52
  end
36
53
 
37
54
  def to_regex
38
- if @set != ''
39
- Regexp.compile("[" + (@inverted ? '^' : '') + @set + "]")
55
+ # want a compact set of ranges for the regex
56
+ set_def = ''
57
+ @set.to_ary.each do |r|
58
+ if r.is_a?(Range)
59
+ set_def += Regexp.quote(r.begin.to_i.chr) + '-' + Regexp.quote(r.end.to_i.chr)
60
+ else
61
+ set_def += Regexp.quote(r.to_i.chr)
62
+ end
63
+ end
64
+ if set_def == ''
65
+ return %r{.}
40
66
  else
41
- %r{}
67
+ %r{[#{set_def}]}
42
68
  end
43
69
  end
44
70
  end
71
+
72
+ class CharClass < CharSet
73
+ @@charsets = {
74
+ 'alnum' => [ 0x30 .. 0x39, 0x41 .. 0x5a, 0x61 .. 0x7a ],
75
+ 'alpha' => [ 0x41 .. 0x5a, 0x61 .. 0x7a ],
76
+ 'ascii' => [ 0x00 .. 0x7f ],
77
+ 'blank' => [ 0x0b, 0x20 ], # \t + space
78
+ 'cntrl' => [ 0x00 .. 0x1f, 0x7f ],
79
+ 'digit' => [ 0x30 .. 0x39 ],
80
+ 'graph' => [ 0x21 .. 0x7e ],
81
+ 'lower' => [ 0x61 .. 0x7a ],
82
+ 'print' => [ 0x20 .. 0x7e ],
83
+ 'space' => [ 0x0a, 0x0b, 0x0c, 0x0f, 0x20 ], # \t\r\n\v\f + space
84
+ 'upper' => [ 0x41 .. 0x5a ],
85
+ 'word' => [ 0x30 .. 0x39, 0x41 .. 0x5a, 0x61 .. 0x7a, '_'[0] ],
86
+ 'xdigit'=> [ 0x30 .. 0x39, 0x41 .. 0x46, 0x61 .. 0x66 ],
87
+ 'nl' => [ 0x0a, 0x0c ]
88
+ }
89
+
90
+ def initialize(cs)
91
+ @set = BitSet.new.on(@@charsets[cs.downcase] || [])
92
+ end
93
+ end
45
94
  end
@@ -0,0 +1,44 @@
1
+ module Fabulator
2
+ module Grammar
3
+ module Expr
4
+ class LookAhead
5
+ def initialize(sequence)
6
+ @sequence = sequence
7
+ end
8
+
9
+ def name
10
+ nil
11
+ end
12
+
13
+ def parse(source)
14
+ ret = nil
15
+ source.attempt do |c|
16
+ ret = @sequence.parse(c)
17
+ nil
18
+ end
19
+ ret.nil? ? nil : {}
20
+ end
21
+ end
22
+
23
+ class NegLookAhead
24
+ def initialize(sequence)
25
+ @sequence = sequence
26
+ end
27
+
28
+ def name
29
+ nil
30
+ end
31
+
32
+ def parse(source)
33
+ ret = nil
34
+ source.attempt do |c|
35
+ ret = @sequence.parse(c)
36
+ nil
37
+ end
38
+ ret.nil? ? {} : nil
39
+ end
40
+ end
41
+
42
+ end
43
+ end
44
+ end
@@ -1,35 +1,40 @@
1
- module Fabulator::Grammar::Expr
2
- class Rule
3
- def initialize
4
- @sequences = [ ]
5
- @anchor_start = false
6
- @anchor_end = false
7
- end
8
-
9
- def anchor_start
10
- @anchor_start = true
11
- end
1
+ module Fabulator
2
+ module Grammar
3
+ module Expr
4
+ class Rule
5
+ attr_accessor :name
12
6
 
13
- def anchor_end
14
- @anchor_end = true
15
- end
7
+ def initialize
8
+ @alternatives = [ ]
9
+ end
16
10
 
17
- def add_sequence(s)
18
- @sequences << s
19
- end
11
+ def add_alternative(a)
12
+ @alternatives << a
13
+ end
20
14
 
21
- def to_regex
22
- r = %r{#{@sequences.collect{ |s| s.to_regex }}}
23
- if @anchor_start
24
- if @anchor_end
25
- %r{^#{r}$}
26
- else
27
- %r{^#{r}}
15
+ def parse(s)
16
+ if s.anchored
17
+ @alternatives.each do |alternative|
18
+ ret = s.attempt { |cursor|
19
+ cursor.anchored = true
20
+ alternative.parse(cursor)
21
+ }
22
+ return ret unless ret.nil?
23
+ end
24
+ else
25
+ while !s.eof
26
+ @alternatives.each do |alternative|
27
+ ret = s.attempt { |cursor|
28
+ cursor.anchored = true
29
+ alternative.parse(cursor)
30
+ }
31
+ return ret unless ret.nil?
32
+ end
33
+ s.advance_position(1)
34
+ end
35
+ end
36
+ return nil
28
37
  end
29
- elsif @anchor_end
30
- %r{#{r}$}
31
- else
32
- r
33
38
  end
34
39
  end
35
40
  end