fabulator-grammar 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. data/History.txt +22 -0
  2. data/Rakefile +3 -1
  3. data/VERSION +1 -1
  4. data/features/grammar.feature +116 -12
  5. data/features/step_definitions/expression_steps.rb +2 -2
  6. data/features/step_definitions/grammar_steps.rb +46 -2
  7. data/features/step_definitions/xml_steps.rb +5 -16
  8. data/features/support/env.rb +1 -0
  9. data/lib/fabulator-grammar.rb +1 -0
  10. data/lib/fabulator/grammar.rb +12 -3
  11. data/lib/fabulator/grammar/actions.rb +17 -7
  12. data/lib/fabulator/grammar/actions/context.rb +18 -0
  13. data/lib/fabulator/grammar/actions/grammar.rb +76 -0
  14. data/lib/fabulator/grammar/actions/rule.rb +51 -0
  15. data/lib/fabulator/grammar/actions/token.rb +27 -0
  16. data/lib/fabulator/grammar/actions/when.rb +35 -0
  17. data/lib/fabulator/grammar/cursor.rb +118 -0
  18. data/lib/fabulator/grammar/expr/anchor.rb +28 -0
  19. data/lib/fabulator/grammar/expr/char_set.rb +67 -18
  20. data/lib/fabulator/grammar/expr/look_ahead.rb +44 -0
  21. data/lib/fabulator/grammar/expr/rule.rb +33 -28
  22. data/lib/fabulator/grammar/expr/rule_alternative.rb +45 -0
  23. data/lib/fabulator/grammar/expr/rule_mode.rb +16 -0
  24. data/lib/fabulator/grammar/expr/rule_ref.rb +15 -4
  25. data/lib/fabulator/grammar/expr/rule_sequence.rb +59 -0
  26. data/lib/fabulator/grammar/expr/sequence.rb +7 -1
  27. data/lib/fabulator/grammar/expr/set_skip.rb +16 -0
  28. data/lib/fabulator/grammar/expr/text.rb +8 -0
  29. data/lib/fabulator/grammar/expr/{rules.rb → token.rb} +12 -1
  30. data/lib/fabulator/grammar/expr/token_alternative.rb +42 -0
  31. data/lib/fabulator/grammar/rule_parser.rb +667 -0
  32. data/lib/fabulator/grammar/token_parser.rb +733 -0
  33. data/rules.racc +249 -0
  34. data/tokens.racc +257 -0
  35. metadata +29 -12
  36. data/lib/fabulator/grammar/parser.rb +0 -548
  37. data/regex.racc +0 -183
@@ -0,0 +1,51 @@
1
+ module Fabulator
2
+ module Grammar
3
+ module Actions
4
+ class Rule < Fabulator::Structural
5
+
6
+ namespace GRAMMAR_NS
7
+
8
+ attribute :name, :static => true
9
+ attribute :mode, :default => "default", :static => true
10
+
11
+ contains :when, :as => :choices
12
+
13
+ has_actions
14
+
15
+ def initialize(g = nil)
16
+ @grammar = g
17
+ @choices = [ ]
18
+ end
19
+
20
+ def parse(cursor)
21
+ # try each when...
22
+ best_attempt = nil
23
+ @choices.each do |choice|
24
+ cursor.attempt do |c|
25
+ ret = choice.parse(c)
26
+ if !ret.nil?
27
+ score = choice.score(cursor.context, ret)
28
+ if best_attempt.nil? || best_attempt[:score] < score
29
+ best_attempt = {
30
+ :score => score,
31
+ :choice => choice,
32
+ :ret => ret
33
+ }
34
+ end
35
+ end
36
+ end
37
+ end
38
+ return nil if best_attempt.nil?
39
+ choice = best_attempt[:choice]
40
+ ret = best_attempt[:ret]
41
+ if choice.has_actions?
42
+ ctx = cursor.context.with_root(cursor.context.root.anon_node(nil))
43
+ ctx.merge_data(ret)
44
+ choice.run(ctx)
45
+ end
46
+ return ret
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,27 @@
1
+ module Fabulator
2
+ module Grammar
3
+ module Actions
4
+ class Token < Fabulator::Structural
5
+
6
+ namespace GRAMMAR_NS
7
+
8
+ attribute :name, :static => true
9
+ attribute :mode, :default => "default", :static => true
10
+ attribute :matches, :static => true
11
+
12
+ def compile_xml(xml, ctx = nil)
13
+ super
14
+
15
+ parser = Fabulator::Grammar::TokenParser.new
16
+
17
+ # parse @matches
18
+ @c_matches = parser.parse(self.matches)
19
+ end
20
+
21
+ def parse(cursor)
22
+ @c_matches.parse(cursor)
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,35 @@
1
+ module Fabulator
2
+ module Grammar
3
+ module Actions
4
+ class When < Fabulator::Structural
5
+
6
+ namespace GRAMMAR_NS
7
+
8
+ attribute :matches, :static => true
9
+ attribute :score, :eval => true
10
+
11
+ has_actions
12
+
13
+ def compile_xml(xml, ctx = nil)
14
+ super
15
+
16
+ parser = Fabulator::Grammar::RuleParser.new
17
+
18
+ # parse @matches
19
+ @c_matches = parser.parse(self.matches)
20
+ end
21
+
22
+ def parse(cursor)
23
+ @c_matches.parse(cursor)
24
+ end
25
+
26
+ def score(context, data)
27
+ return 0 if @score.nil?
28
+ ctx = context.with_root(context.root.anon_node(nil))
29
+ ctx.merge_data(data)
30
+ (self.score(ctx).value rescue 0)
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,118 @@
1
+ module Fabulator
2
+ module Grammar
3
+ class Cursor
4
+ attr_accessor :mode, :skip
5
+
6
+ def initialize(g,ctx,s)
7
+ @source = s
8
+ @grammar = g
9
+ @curpos = 0
10
+ @end = @source.length-1
11
+ @line = 0
12
+ @col = 0
13
+ @anchored = false
14
+ @mode = :default
15
+ @skip = nil
16
+ @context = ctx.with_root(ctx.root.anon_node(nil))
17
+ end
18
+
19
+ def context
20
+ @context
21
+ end
22
+
23
+ def pos
24
+ @curpos
25
+ end
26
+
27
+ def resync(pat)
28
+ until self.eof || @source[@curpos..@source.length-1] =~ %r{^#{pat}}
29
+ @curpos += 1
30
+ end
31
+ end
32
+
33
+ def eof
34
+ @curpos > @end
35
+ end
36
+
37
+ def advance_position(i)
38
+ @curpos += i if i > 0
39
+ end
40
+
41
+ def anchored
42
+ @anchored
43
+ end
44
+
45
+ def anchored=(t)
46
+ @anchored = t
47
+ end
48
+
49
+ def grammar
50
+ @grammar
51
+ end
52
+
53
+ def point
54
+ { :curpos => @curpos, :line => @line, :col => @col, :root => @context.root, :mode => @mode, :anchored => @anchored, :skip => @skip }
55
+ end
56
+
57
+ def point=(p)
58
+ @curpos = p[:curpos]
59
+ @line = p[:line]
60
+ @col = p[:col]
61
+ @mode = p[:mode]
62
+ @anchored = p[:anchored]
63
+ @skip = p[:skip]
64
+ @context.root = p[:root]
65
+ end
66
+
67
+ def attempt(&block)
68
+ saved = self.point
69
+ ret = yield self
70
+ if ret.nil?
71
+ self.point = saved
72
+ return nil
73
+ end
74
+
75
+ return ret
76
+ end
77
+
78
+ def find_rule(nom)
79
+ r = @grammar.get_rule(@mode, nom)
80
+ if r.nil? && @mode.to_s != 'default'
81
+ r = @grammar.get_rule('default', nom)
82
+ end
83
+ r
84
+ end
85
+
86
+ def data
87
+ @context
88
+ end
89
+
90
+ def do_skip
91
+ if !@skip.nil?
92
+ my_skip = @skip
93
+ new_pos = @curpos
94
+ self.attempt do |cursor|
95
+ cursor.skip = nil
96
+ cursor.anchored
97
+ r = my_skip.parse(cursor)
98
+ while !r.nil?
99
+ r = my_skip.parse(cursor)
100
+ end
101
+ new_pos = cursor.pos
102
+ end
103
+ @curpos = new_pos
104
+ end
105
+ end
106
+
107
+ def match_token(regex)
108
+ res = nil
109
+ do_skip
110
+ if @source[@curpos .. @end] =~ %r{^(#{regex})}
111
+ res = $1.to_s
112
+ @curpos += res.length
113
+ end
114
+ res
115
+ end
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,28 @@
1
+ module Fabulator
2
+ module Grammar
3
+ module Expr
4
+ class Anchor
5
+ def initialize(t)
6
+ @anchor = t
7
+ end
8
+
9
+ def name
10
+ nil
11
+ end
12
+
13
+ def parse(source)
14
+ ret = nil
15
+ case @anchor
16
+ when :start_of_string:
17
+ ret = source.pos == 0 ? {} : nil
18
+ when :start_of_line:
19
+ when :end_of_string:
20
+ ret = source.eof ? {} : nil
21
+ when :end_of_line:
22
+ end
23
+ ret
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -1,45 +1,94 @@
1
+ require 'bitset'
2
+
1
3
  module Fabulator::Grammar::Expr
2
4
  class CharSet
3
- def initialize(cs)
4
- chars = ""
5
- ranges = ""
5
+ def initialize(cs = "")
6
+ @set = BitSet.new
6
7
  if cs[0..0] == '-'
7
- chars = '-'
8
+ @set.on(('-')[0])
8
9
  cs = cs[1..cs.length-1]
9
10
  end
10
11
  bits = cs.split(/-/) # to pull out ranges
11
12
  if bits.size == 1
12
- chars = bits[0]
13
- else
13
+ bits[0].each_char{ |c|
14
+ @set.on(c[0])
15
+ }
16
+ elsif bits.size > 1
14
17
  if bits[0].size > 1
15
- chars += b[0..0]
18
+ @set.on(bits[0][0])
16
19
  end
17
20
  while(bits.size > 1)
18
21
  b = bits.shift
19
22
  if b.size > 2
20
- chars += b[1..b.size-2]
23
+ b[1..b.size-2].each_char { |c| @set.on(c[0]) }
21
24
  end
22
- ranges += Regexp.quote(b[b.size-1 .. b.size-1]) + '-' + Regexp.quote(bits[0][0..0])
25
+ @set.on(b[b.size-1] .. bits[0][0])
23
26
  end
24
27
  if bits[0].size > 1
25
- chars += bits[0][1..bits[0].size-1]
28
+ bits[0][1..bits[0].size-2].each_char { |c|
29
+ @set.on(c[0])
30
+ }
26
31
  end
27
32
  end
28
- chars = chars.collect{ |cc| Regexp.quote(cc) }.join('')
29
- @set = chars + ranges
30
- @inverted = false
31
33
  end
32
34
 
33
- def inverted
34
- @inverted = true
35
+ def set
36
+ @set
37
+ end
38
+
39
+ def or(c)
40
+ @set = @set | c.set
41
+ self
42
+ end
43
+
44
+ def but_not(c)
45
+ @set = @set - c.set
46
+ self
47
+ end
48
+
49
+ # for now, we restrict ourselves to 8-bit characters
50
+ def universal
51
+ @set.on(0..0xff)
35
52
  end
36
53
 
37
54
  def to_regex
38
- if @set != ''
39
- Regexp.compile("[" + (@inverted ? '^' : '') + @set + "]")
55
+ # want a compact set of ranges for the regex
56
+ set_def = ''
57
+ @set.to_ary.each do |r|
58
+ if r.is_a?(Range)
59
+ set_def += Regexp.quote(r.begin.to_i.chr) + '-' + Regexp.quote(r.end.to_i.chr)
60
+ else
61
+ set_def += Regexp.quote(r.to_i.chr)
62
+ end
63
+ end
64
+ if set_def == ''
65
+ return %r{.}
40
66
  else
41
- %r{}
67
+ %r{[#{set_def}]}
42
68
  end
43
69
  end
44
70
  end
71
+
72
+ class CharClass < CharSet
73
+ @@charsets = {
74
+ 'alnum' => [ 0x30 .. 0x39, 0x41 .. 0x5a, 0x61 .. 0x7a ],
75
+ 'alpha' => [ 0x41 .. 0x5a, 0x61 .. 0x7a ],
76
+ 'ascii' => [ 0x00 .. 0x7f ],
77
+ 'blank' => [ 0x0b, 0x20 ], # \t + space
78
+ 'cntrl' => [ 0x00 .. 0x1f, 0x7f ],
79
+ 'digit' => [ 0x30 .. 0x39 ],
80
+ 'graph' => [ 0x21 .. 0x7e ],
81
+ 'lower' => [ 0x61 .. 0x7a ],
82
+ 'print' => [ 0x20 .. 0x7e ],
83
+ 'space' => [ 0x0a, 0x0b, 0x0c, 0x0f, 0x20 ], # \t\r\n\v\f + space
84
+ 'upper' => [ 0x41 .. 0x5a ],
85
+ 'word' => [ 0x30 .. 0x39, 0x41 .. 0x5a, 0x61 .. 0x7a, '_'[0] ],
86
+ 'xdigit'=> [ 0x30 .. 0x39, 0x41 .. 0x46, 0x61 .. 0x66 ],
87
+ 'nl' => [ 0x0a, 0x0c ]
88
+ }
89
+
90
+ def initialize(cs)
91
+ @set = BitSet.new.on(@@charsets[cs.downcase] || [])
92
+ end
93
+ end
45
94
  end
@@ -0,0 +1,44 @@
1
+ module Fabulator
2
+ module Grammar
3
+ module Expr
4
+ class LookAhead
5
+ def initialize(sequence)
6
+ @sequence = sequence
7
+ end
8
+
9
+ def name
10
+ nil
11
+ end
12
+
13
+ def parse(source)
14
+ ret = nil
15
+ source.attempt do |c|
16
+ ret = @sequence.parse(c)
17
+ nil
18
+ end
19
+ ret.nil? ? nil : {}
20
+ end
21
+ end
22
+
23
+ class NegLookAhead
24
+ def initialize(sequence)
25
+ @sequence = sequence
26
+ end
27
+
28
+ def name
29
+ nil
30
+ end
31
+
32
+ def parse(source)
33
+ ret = nil
34
+ source.attempt do |c|
35
+ ret = @sequence.parse(c)
36
+ nil
37
+ end
38
+ ret.nil? ? {} : nil
39
+ end
40
+ end
41
+
42
+ end
43
+ end
44
+ end
@@ -1,35 +1,40 @@
1
- module Fabulator::Grammar::Expr
2
- class Rule
3
- def initialize
4
- @sequences = [ ]
5
- @anchor_start = false
6
- @anchor_end = false
7
- end
8
-
9
- def anchor_start
10
- @anchor_start = true
11
- end
1
+ module Fabulator
2
+ module Grammar
3
+ module Expr
4
+ class Rule
5
+ attr_accessor :name
12
6
 
13
- def anchor_end
14
- @anchor_end = true
15
- end
7
+ def initialize
8
+ @alternatives = [ ]
9
+ end
16
10
 
17
- def add_sequence(s)
18
- @sequences << s
19
- end
11
+ def add_alternative(a)
12
+ @alternatives << a
13
+ end
20
14
 
21
- def to_regex
22
- r = %r{#{@sequences.collect{ |s| s.to_regex }}}
23
- if @anchor_start
24
- if @anchor_end
25
- %r{^#{r}$}
26
- else
27
- %r{^#{r}}
15
+ def parse(s)
16
+ if s.anchored
17
+ @alternatives.each do |alternative|
18
+ ret = s.attempt { |cursor|
19
+ cursor.anchored = true
20
+ alternative.parse(cursor)
21
+ }
22
+ return ret unless ret.nil?
23
+ end
24
+ else
25
+ while !s.eof
26
+ @alternatives.each do |alternative|
27
+ ret = s.attempt { |cursor|
28
+ cursor.anchored = true
29
+ alternative.parse(cursor)
30
+ }
31
+ return ret unless ret.nil?
32
+ end
33
+ s.advance_position(1)
34
+ end
35
+ end
36
+ return nil
28
37
  end
29
- elsif @anchor_end
30
- %r{#{r}$}
31
- else
32
- r
33
38
  end
34
39
  end
35
40
  end