fabulator-grammar 0.0.1 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +22 -0
- data/Rakefile +3 -1
- data/VERSION +1 -1
- data/features/grammar.feature +116 -12
- data/features/step_definitions/expression_steps.rb +2 -2
- data/features/step_definitions/grammar_steps.rb +46 -2
- data/features/step_definitions/xml_steps.rb +5 -16
- data/features/support/env.rb +1 -0
- data/lib/fabulator-grammar.rb +1 -0
- data/lib/fabulator/grammar.rb +12 -3
- data/lib/fabulator/grammar/actions.rb +17 -7
- data/lib/fabulator/grammar/actions/context.rb +18 -0
- data/lib/fabulator/grammar/actions/grammar.rb +76 -0
- data/lib/fabulator/grammar/actions/rule.rb +51 -0
- data/lib/fabulator/grammar/actions/token.rb +27 -0
- data/lib/fabulator/grammar/actions/when.rb +35 -0
- data/lib/fabulator/grammar/cursor.rb +118 -0
- data/lib/fabulator/grammar/expr/anchor.rb +28 -0
- data/lib/fabulator/grammar/expr/char_set.rb +67 -18
- data/lib/fabulator/grammar/expr/look_ahead.rb +44 -0
- data/lib/fabulator/grammar/expr/rule.rb +33 -28
- data/lib/fabulator/grammar/expr/rule_alternative.rb +45 -0
- data/lib/fabulator/grammar/expr/rule_mode.rb +16 -0
- data/lib/fabulator/grammar/expr/rule_ref.rb +15 -4
- data/lib/fabulator/grammar/expr/rule_sequence.rb +59 -0
- data/lib/fabulator/grammar/expr/sequence.rb +7 -1
- data/lib/fabulator/grammar/expr/set_skip.rb +16 -0
- data/lib/fabulator/grammar/expr/text.rb +8 -0
- data/lib/fabulator/grammar/expr/{rules.rb → token.rb} +12 -1
- data/lib/fabulator/grammar/expr/token_alternative.rb +42 -0
- data/lib/fabulator/grammar/rule_parser.rb +667 -0
- data/lib/fabulator/grammar/token_parser.rb +733 -0
- data/rules.racc +249 -0
- data/tokens.racc +257 -0
- metadata +29 -12
- data/lib/fabulator/grammar/parser.rb +0 -548
- data/regex.racc +0 -183
data/regex.racc
DELETED
@@ -1,183 +0,0 @@
|
|
1
|
-
class Fabulator::Grammar::Parser
|
2
|
-
|
3
|
-
start rules
|
4
|
-
|
5
|
-
rule
|
6
|
-
rules: anchored_rule { result = Fabulator::Grammar::Expr::Rules.new; result.add_alternative(val[0]) }
|
7
|
-
| rules PIPE anchored_rule { result = val[0]; result.add_alternative(val[2]) }
|
8
|
-
|
9
|
-
anchored_rule: rule { result = val[0] }
|
10
|
-
| CARET rule { result = val[1]; result.anchor_start }
|
11
|
-
| rule DOLLAR { result = val[0]; result.anchor_end }
|
12
|
-
| CARET rule DOLLAR { result = val[1]; result.anchor_start; result.anchor_end }
|
13
|
-
|
14
|
-
rule: { result = Fabulator::Grammar::Expr::Rule.new; }
|
15
|
-
| rule sequence { result = val[0]; result.add_sequence(val[1]); }
|
16
|
-
|
17
|
-
sequence: sub_sequence sequence_qualifiers { result = Fabulator::Grammar::Expr::Sequence.new(val[0], val[1]) }
|
18
|
-
| sub_sequence { result = Fabulator::Grammar::Expr::Sequence.new(val[0]) }
|
19
|
-
|
20
|
-
sub_sequence: LT qname GT { result = Fabulator::Grammar::Expr::RuleRef.new(val[1]) }
|
21
|
-
| text { result = Fabulator::Grammar::Expr::Text.new(val[0]) }
|
22
|
-
| DOT { result = Fabulator::Grammar::Expr::Any.new }
|
23
|
-
| LP rules RP { result = val[1] }
|
24
|
-
| LB text RB { result = Fabulator::Grammar::Expr::CharSet.new(val[1]) }
|
25
|
-
| LB CARET text RB { result = Fabulator::Grammar::Expr::CharSet.new(val[2]); result.inverted }
|
26
|
-
|
27
|
-
text: qname { result = val[0] }
|
28
|
-
| TEXT { result = val[0] }
|
29
|
-
| INTEGER { result = val[0] }
|
30
|
-
|
31
|
-
qname: NCNAME { result = val[0] }
|
32
|
-
| NCNAME COLON NCNAME { result = val[0] + ':' + val[2] }
|
33
|
-
|
34
|
-
sequence_qualifiers: STAR { result = [ :zero_or_more ] }
|
35
|
-
| STAR QUESTION { result = [ :zero_or_more, :min ] }
|
36
|
-
| PLUS { result = [ :one_or_more ] }
|
37
|
-
| PLUS QUESTION { result = [ :one_or_more, :min ] }
|
38
|
-
| QUESTION { result = [ :zero_or_one ] }
|
39
|
-
| QUESTION QUESTION { result = [ :zero_or_one, :min ] }
|
40
|
-
| LC INTEGER RC { result = [ :exact, val[1].to_i ] }
|
41
|
-
| LC INTEGER COMMA INTEGER RC { result = [ :range, val[1].to_i, val[3].to_i ] }
|
42
|
-
| LC INTEGER COMMA RC { result = [ :range, val[1], '' ] }
|
43
|
-
| LC INTEGER COMMA RC QUESTION { result = [ :min, :range, val[1], '' ] }
|
44
|
-
| LC INTEGER COMMA INTEGER RC QUESTION { result = [ :min, :range, val[1].to_i, val[3].to_i ] }
|
45
|
-
|
46
|
-
|
47
|
-
---- inner
|
48
|
-
require 'fabulator/grammar'
|
49
|
-
|
50
|
-
def parse(t, ctx)
|
51
|
-
@source = t
|
52
|
-
@curpos = 0
|
53
|
-
@context = ctx
|
54
|
-
@line = 0
|
55
|
-
|
56
|
-
@yydebug = true
|
57
|
-
|
58
|
-
@last_token = nil
|
59
|
-
|
60
|
-
do_parse
|
61
|
-
end
|
62
|
-
|
63
|
-
def on_error(*args)
|
64
|
-
raise Fabulator::Grammar::ParserError.new("unable to parse '#{args[1]}' near line #{@line + 1}, column #{@col}")
|
65
|
-
end
|
66
|
-
|
67
|
-
@@regex = {
|
68
|
-
:ncname => %r{(?:[a-zA-Z_][-a-zA-Z0-9_.]*)}
|
69
|
-
}
|
70
|
-
|
71
|
-
@@regex[:qname] = %r{((?:#{@@regex[:ncname]}:)?#{@@regex[:ncname]})}
|
72
|
-
|
73
|
-
def next_token
|
74
|
-
@token = nil
|
75
|
-
white_space = 0
|
76
|
-
new_line = 0
|
77
|
-
@col = 0
|
78
|
-
while @curpos < @source.length && @source[@curpos..@curpos] =~ /\s/ do
|
79
|
-
if @source[@curpos..@curpos] =~ /\n/
|
80
|
-
new_line = new_line + 1
|
81
|
-
@line = @line + 1
|
82
|
-
@col = 0
|
83
|
-
else
|
84
|
-
@col = @col + 1
|
85
|
-
end
|
86
|
-
@curpos = @curpos + 1
|
87
|
-
white_space = white_space + 1
|
88
|
-
end
|
89
|
-
|
90
|
-
# skip comments delimited by (: :)
|
91
|
-
# comments can be nested
|
92
|
-
# these are XPath 2.0 comments
|
93
|
-
#
|
94
|
-
if @curpos < @source.length && @source[@curpos..@curpos+1] == '(:'
|
95
|
-
comment_depth = 1
|
96
|
-
@curpos = @curpos + 2
|
97
|
-
@col = @col + 2
|
98
|
-
while comment_depth > 0 && @curpos < @source.length
|
99
|
-
if @source[@curpos..@curpos+1] == '(:'
|
100
|
-
comment_depth = comment_depth + 1
|
101
|
-
@curpos = @curpos + 1
|
102
|
-
@col = @col + 1
|
103
|
-
end
|
104
|
-
if @source[@curpos..@curpos+1] == ':)'
|
105
|
-
comment_depth = comment_depth - 1
|
106
|
-
@curpos = @curpos + 1
|
107
|
-
@col = @col + 1
|
108
|
-
end
|
109
|
-
@curpos = @curpos + 1
|
110
|
-
@col = @col + 1
|
111
|
-
end
|
112
|
-
white_space = white_space + 1
|
113
|
-
end
|
114
|
-
|
115
|
-
while @curpos < @source.length && @source[@curpos..@curpos] =~ /\s/ do
|
116
|
-
if @source[@curpos..@curpos] =~ /\n/
|
117
|
-
new_line = new_line + 1
|
118
|
-
@line = @line + 1
|
119
|
-
@col = 0
|
120
|
-
else
|
121
|
-
@col = @col + 1
|
122
|
-
end
|
123
|
-
@curpos = @curpos + 1
|
124
|
-
white_space = white_space + 1
|
125
|
-
end
|
126
|
-
|
127
|
-
if @curpos >= @source.length
|
128
|
-
@last_token = nil
|
129
|
-
return [ false, false ]
|
130
|
-
end
|
131
|
-
|
132
|
-
case @source[@curpos..@curpos]
|
133
|
-
when '<': @token = [ :LT, '<' ]
|
134
|
-
when '>': @token = [ :GT, '>' ]
|
135
|
-
when '[': @token = [ :LB, '[' ]
|
136
|
-
when ']': @token = [ :RB, ']' ]
|
137
|
-
when '(': @token = [ :LP, '(' ]
|
138
|
-
when ')': @token = [ :RP, ')' ]
|
139
|
-
when '{': @token = [ :LC, '{' ]
|
140
|
-
when '}': @token = [ :RC, '}' ]
|
141
|
-
when ':': @token = [ :COLON, ':' ]
|
142
|
-
when ',': @token = [ :COMMA, ',' ]
|
143
|
-
when '|': @token = [ :PIPE, '|' ]
|
144
|
-
when '*': @token = [ :STAR, '*' ]
|
145
|
-
when '+': @token = [ :PLUS, '+' ]
|
146
|
-
when '.': @token = [ :DOT, '.' ]
|
147
|
-
when '?': @token = [ :QUESTION, '?' ]
|
148
|
-
when '$': @token = [ :DOLLAR, '$' ]
|
149
|
-
when '^': @token = [ :CARET, '^' ]
|
150
|
-
end
|
151
|
-
|
152
|
-
if @token.nil?
|
153
|
-
# get longest sequence of non-special characters
|
154
|
-
# if it's all digits, report INTEGER
|
155
|
-
# if it's a qname, report QNAME
|
156
|
-
# otherwise, report TEXT
|
157
|
-
@source[@curpos..@source.length-1] =~ /^(((\\.)|[^ \$\^\[\]<>\{\}\(\):,|*+.?])+)*/
|
158
|
-
text = $1
|
159
|
-
bits = text.split(/\\/)
|
160
|
-
text = bits.join('')
|
161
|
-
@curpos += bits.size - 1
|
162
|
-
if text.length > 0
|
163
|
-
if @source[@curpos+text.length .. @curpos+text.length] =~ /[*?+\{]/
|
164
|
-
text = text[0..text.length-2]
|
165
|
-
@token = [ :TEXT, text ]
|
166
|
-
else
|
167
|
-
case text
|
168
|
-
when /^\d+$/: @token = [ :INTEGER, text ]
|
169
|
-
when /^#{@@regex[:ncname]}$/: @token = [ :NCNAME, text ]
|
170
|
-
else @token = [ :TEXT, text ]
|
171
|
-
end
|
172
|
-
end
|
173
|
-
end
|
174
|
-
end
|
175
|
-
|
176
|
-
if @token.nil?
|
177
|
-
puts "Uh oh... we don't know what to do: #{@source[@curpos .. @source.length-1]}"
|
178
|
-
else
|
179
|
-
@curpos += @token[1].length
|
180
|
-
end
|
181
|
-
|
182
|
-
return @token
|
183
|
-
end
|