fabulator-grammar 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. data/History.txt +22 -0
  2. data/Rakefile +3 -1
  3. data/VERSION +1 -1
  4. data/features/grammar.feature +116 -12
  5. data/features/step_definitions/expression_steps.rb +2 -2
  6. data/features/step_definitions/grammar_steps.rb +46 -2
  7. data/features/step_definitions/xml_steps.rb +5 -16
  8. data/features/support/env.rb +1 -0
  9. data/lib/fabulator-grammar.rb +1 -0
  10. data/lib/fabulator/grammar.rb +12 -3
  11. data/lib/fabulator/grammar/actions.rb +17 -7
  12. data/lib/fabulator/grammar/actions/context.rb +18 -0
  13. data/lib/fabulator/grammar/actions/grammar.rb +76 -0
  14. data/lib/fabulator/grammar/actions/rule.rb +51 -0
  15. data/lib/fabulator/grammar/actions/token.rb +27 -0
  16. data/lib/fabulator/grammar/actions/when.rb +35 -0
  17. data/lib/fabulator/grammar/cursor.rb +118 -0
  18. data/lib/fabulator/grammar/expr/anchor.rb +28 -0
  19. data/lib/fabulator/grammar/expr/char_set.rb +67 -18
  20. data/lib/fabulator/grammar/expr/look_ahead.rb +44 -0
  21. data/lib/fabulator/grammar/expr/rule.rb +33 -28
  22. data/lib/fabulator/grammar/expr/rule_alternative.rb +45 -0
  23. data/lib/fabulator/grammar/expr/rule_mode.rb +16 -0
  24. data/lib/fabulator/grammar/expr/rule_ref.rb +15 -4
  25. data/lib/fabulator/grammar/expr/rule_sequence.rb +59 -0
  26. data/lib/fabulator/grammar/expr/sequence.rb +7 -1
  27. data/lib/fabulator/grammar/expr/set_skip.rb +16 -0
  28. data/lib/fabulator/grammar/expr/text.rb +8 -0
  29. data/lib/fabulator/grammar/expr/{rules.rb → token.rb} +12 -1
  30. data/lib/fabulator/grammar/expr/token_alternative.rb +42 -0
  31. data/lib/fabulator/grammar/rule_parser.rb +667 -0
  32. data/lib/fabulator/grammar/token_parser.rb +733 -0
  33. data/rules.racc +249 -0
  34. data/tokens.racc +257 -0
  35. metadata +29 -12
  36. data/lib/fabulator/grammar/parser.rb +0 -548
  37. data/regex.racc +0 -183
data/regex.racc DELETED
@@ -1,183 +0,0 @@
1
- class Fabulator::Grammar::Parser
2
-
3
- start rules
4
-
5
- rule
6
- rules: anchored_rule { result = Fabulator::Grammar::Expr::Rules.new; result.add_alternative(val[0]) }
7
- | rules PIPE anchored_rule { result = val[0]; result.add_alternative(val[2]) }
8
-
9
- anchored_rule: rule { result = val[0] }
10
- | CARET rule { result = val[1]; result.anchor_start }
11
- | rule DOLLAR { result = val[0]; result.anchor_end }
12
- | CARET rule DOLLAR { result = val[1]; result.anchor_start; result.anchor_end }
13
-
14
- rule: { result = Fabulator::Grammar::Expr::Rule.new; }
15
- | rule sequence { result = val[0]; result.add_sequence(val[1]); }
16
-
17
- sequence: sub_sequence sequence_qualifiers { result = Fabulator::Grammar::Expr::Sequence.new(val[0], val[1]) }
18
- | sub_sequence { result = Fabulator::Grammar::Expr::Sequence.new(val[0]) }
19
-
20
- sub_sequence: LT qname GT { result = Fabulator::Grammar::Expr::RuleRef.new(val[1]) }
21
- | text { result = Fabulator::Grammar::Expr::Text.new(val[0]) }
22
- | DOT { result = Fabulator::Grammar::Expr::Any.new }
23
- | LP rules RP { result = val[1] }
24
- | LB text RB { result = Fabulator::Grammar::Expr::CharSet.new(val[1]) }
25
- | LB CARET text RB { result = Fabulator::Grammar::Expr::CharSet.new(val[2]); result.inverted }
26
-
27
- text: qname { result = val[0] }
28
- | TEXT { result = val[0] }
29
- | INTEGER { result = val[0] }
30
-
31
- qname: NCNAME { result = val[0] }
32
- | NCNAME COLON NCNAME { result = val[0] + ':' + val[2] }
33
-
34
- sequence_qualifiers: STAR { result = [ :zero_or_more ] }
35
- | STAR QUESTION { result = [ :zero_or_more, :min ] }
36
- | PLUS { result = [ :one_or_more ] }
37
- | PLUS QUESTION { result = [ :one_or_more, :min ] }
38
- | QUESTION { result = [ :zero_or_one ] }
39
- | QUESTION QUESTION { result = [ :zero_or_one, :min ] }
40
- | LC INTEGER RC { result = [ :exact, val[1].to_i ] }
41
- | LC INTEGER COMMA INTEGER RC { result = [ :range, val[1].to_i, val[3].to_i ] }
42
- | LC INTEGER COMMA RC { result = [ :range, val[1], '' ] }
43
- | LC INTEGER COMMA RC QUESTION { result = [ :min, :range, val[1], '' ] }
44
- | LC INTEGER COMMA INTEGER RC QUESTION { result = [ :min, :range, val[1].to_i, val[3].to_i ] }
45
-
46
-
47
- ---- inner
48
- require 'fabulator/grammar'
49
-
50
- def parse(t, ctx)
51
- @source = t
52
- @curpos = 0
53
- @context = ctx
54
- @line = 0
55
-
56
- @yydebug = true
57
-
58
- @last_token = nil
59
-
60
- do_parse
61
- end
62
-
63
- def on_error(*args)
64
- raise Fabulator::Grammar::ParserError.new("unable to parse '#{args[1]}' near line #{@line + 1}, column #{@col}")
65
- end
66
-
67
- @@regex = {
68
- :ncname => %r{(?:[a-zA-Z_][-a-zA-Z0-9_.]*)}
69
- }
70
-
71
- @@regex[:qname] = %r{((?:#{@@regex[:ncname]}:)?#{@@regex[:ncname]})}
72
-
73
- def next_token
74
- @token = nil
75
- white_space = 0
76
- new_line = 0
77
- @col = 0
78
- while @curpos < @source.length && @source[@curpos..@curpos] =~ /\s/ do
79
- if @source[@curpos..@curpos] =~ /\n/
80
- new_line = new_line + 1
81
- @line = @line + 1
82
- @col = 0
83
- else
84
- @col = @col + 1
85
- end
86
- @curpos = @curpos + 1
87
- white_space = white_space + 1
88
- end
89
-
90
- # skip comments delimited by (: :)
91
- # comments can be nested
92
- # these are XPath 2.0 comments
93
- #
94
- if @curpos < @source.length && @source[@curpos..@curpos+1] == '(:'
95
- comment_depth = 1
96
- @curpos = @curpos + 2
97
- @col = @col + 2
98
- while comment_depth > 0 && @curpos < @source.length
99
- if @source[@curpos..@curpos+1] == '(:'
100
- comment_depth = comment_depth + 1
101
- @curpos = @curpos + 1
102
- @col = @col + 1
103
- end
104
- if @source[@curpos..@curpos+1] == ':)'
105
- comment_depth = comment_depth - 1
106
- @curpos = @curpos + 1
107
- @col = @col + 1
108
- end
109
- @curpos = @curpos + 1
110
- @col = @col + 1
111
- end
112
- white_space = white_space + 1
113
- end
114
-
115
- while @curpos < @source.length && @source[@curpos..@curpos] =~ /\s/ do
116
- if @source[@curpos..@curpos] =~ /\n/
117
- new_line = new_line + 1
118
- @line = @line + 1
119
- @col = 0
120
- else
121
- @col = @col + 1
122
- end
123
- @curpos = @curpos + 1
124
- white_space = white_space + 1
125
- end
126
-
127
- if @curpos >= @source.length
128
- @last_token = nil
129
- return [ false, false ]
130
- end
131
-
132
- case @source[@curpos..@curpos]
133
- when '<': @token = [ :LT, '<' ]
134
- when '>': @token = [ :GT, '>' ]
135
- when '[': @token = [ :LB, '[' ]
136
- when ']': @token = [ :RB, ']' ]
137
- when '(': @token = [ :LP, '(' ]
138
- when ')': @token = [ :RP, ')' ]
139
- when '{': @token = [ :LC, '{' ]
140
- when '}': @token = [ :RC, '}' ]
141
- when ':': @token = [ :COLON, ':' ]
142
- when ',': @token = [ :COMMA, ',' ]
143
- when '|': @token = [ :PIPE, '|' ]
144
- when '*': @token = [ :STAR, '*' ]
145
- when '+': @token = [ :PLUS, '+' ]
146
- when '.': @token = [ :DOT, '.' ]
147
- when '?': @token = [ :QUESTION, '?' ]
148
- when '$': @token = [ :DOLLAR, '$' ]
149
- when '^': @token = [ :CARET, '^' ]
150
- end
151
-
152
- if @token.nil?
153
- # get longest sequence of non-special characters
154
- # if it's all digits, report INTEGER
155
- # if it's a qname, report QNAME
156
- # otherwise, report TEXT
157
- @source[@curpos..@source.length-1] =~ /^(((\\.)|[^ \$\^\[\]<>\{\}\(\):,|*+.?])+)*/
158
- text = $1
159
- bits = text.split(/\\/)
160
- text = bits.join('')
161
- @curpos += bits.size - 1
162
- if text.length > 0
163
- if @source[@curpos+text.length .. @curpos+text.length] =~ /[*?+\{]/
164
- text = text[0..text.length-2]
165
- @token = [ :TEXT, text ]
166
- else
167
- case text
168
- when /^\d+$/: @token = [ :INTEGER, text ]
169
- when /^#{@@regex[:ncname]}$/: @token = [ :NCNAME, text ]
170
- else @token = [ :TEXT, text ]
171
- end
172
- end
173
- end
174
- end
175
-
176
- if @token.nil?
177
- puts "Uh oh... we don't know what to do: #{@source[@curpos .. @source.length-1]}"
178
- else
179
- @curpos += @token[1].length
180
- end
181
-
182
- return @token
183
- end