fabulator-grammar 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +22 -0
- data/Rakefile +3 -1
- data/VERSION +1 -1
- data/features/grammar.feature +116 -12
- data/features/step_definitions/expression_steps.rb +2 -2
- data/features/step_definitions/grammar_steps.rb +46 -2
- data/features/step_definitions/xml_steps.rb +5 -16
- data/features/support/env.rb +1 -0
- data/lib/fabulator-grammar.rb +1 -0
- data/lib/fabulator/grammar.rb +12 -3
- data/lib/fabulator/grammar/actions.rb +17 -7
- data/lib/fabulator/grammar/actions/context.rb +18 -0
- data/lib/fabulator/grammar/actions/grammar.rb +76 -0
- data/lib/fabulator/grammar/actions/rule.rb +51 -0
- data/lib/fabulator/grammar/actions/token.rb +27 -0
- data/lib/fabulator/grammar/actions/when.rb +35 -0
- data/lib/fabulator/grammar/cursor.rb +118 -0
- data/lib/fabulator/grammar/expr/anchor.rb +28 -0
- data/lib/fabulator/grammar/expr/char_set.rb +67 -18
- data/lib/fabulator/grammar/expr/look_ahead.rb +44 -0
- data/lib/fabulator/grammar/expr/rule.rb +33 -28
- data/lib/fabulator/grammar/expr/rule_alternative.rb +45 -0
- data/lib/fabulator/grammar/expr/rule_mode.rb +16 -0
- data/lib/fabulator/grammar/expr/rule_ref.rb +15 -4
- data/lib/fabulator/grammar/expr/rule_sequence.rb +59 -0
- data/lib/fabulator/grammar/expr/sequence.rb +7 -1
- data/lib/fabulator/grammar/expr/set_skip.rb +16 -0
- data/lib/fabulator/grammar/expr/text.rb +8 -0
- data/lib/fabulator/grammar/expr/{rules.rb → token.rb} +12 -1
- data/lib/fabulator/grammar/expr/token_alternative.rb +42 -0
- data/lib/fabulator/grammar/rule_parser.rb +667 -0
- data/lib/fabulator/grammar/token_parser.rb +733 -0
- data/rules.racc +249 -0
- data/tokens.racc +257 -0
- metadata +29 -12
- data/lib/fabulator/grammar/parser.rb +0 -548
- data/regex.racc +0 -183
data/regex.racc
DELETED
@@ -1,183 +0,0 @@
|
|
1
|
-
class Fabulator::Grammar::Parser
|
2
|
-
|
3
|
-
start rules
|
4
|
-
|
5
|
-
rule
|
6
|
-
rules: anchored_rule { result = Fabulator::Grammar::Expr::Rules.new; result.add_alternative(val[0]) }
|
7
|
-
| rules PIPE anchored_rule { result = val[0]; result.add_alternative(val[2]) }
|
8
|
-
|
9
|
-
anchored_rule: rule { result = val[0] }
|
10
|
-
| CARET rule { result = val[1]; result.anchor_start }
|
11
|
-
| rule DOLLAR { result = val[0]; result.anchor_end }
|
12
|
-
| CARET rule DOLLAR { result = val[1]; result.anchor_start; result.anchor_end }
|
13
|
-
|
14
|
-
rule: { result = Fabulator::Grammar::Expr::Rule.new; }
|
15
|
-
| rule sequence { result = val[0]; result.add_sequence(val[1]); }
|
16
|
-
|
17
|
-
sequence: sub_sequence sequence_qualifiers { result = Fabulator::Grammar::Expr::Sequence.new(val[0], val[1]) }
|
18
|
-
| sub_sequence { result = Fabulator::Grammar::Expr::Sequence.new(val[0]) }
|
19
|
-
|
20
|
-
sub_sequence: LT qname GT { result = Fabulator::Grammar::Expr::RuleRef.new(val[1]) }
|
21
|
-
| text { result = Fabulator::Grammar::Expr::Text.new(val[0]) }
|
22
|
-
| DOT { result = Fabulator::Grammar::Expr::Any.new }
|
23
|
-
| LP rules RP { result = val[1] }
|
24
|
-
| LB text RB { result = Fabulator::Grammar::Expr::CharSet.new(val[1]) }
|
25
|
-
| LB CARET text RB { result = Fabulator::Grammar::Expr::CharSet.new(val[2]); result.inverted }
|
26
|
-
|
27
|
-
text: qname { result = val[0] }
|
28
|
-
| TEXT { result = val[0] }
|
29
|
-
| INTEGER { result = val[0] }
|
30
|
-
|
31
|
-
qname: NCNAME { result = val[0] }
|
32
|
-
| NCNAME COLON NCNAME { result = val[0] + ':' + val[2] }
|
33
|
-
|
34
|
-
sequence_qualifiers: STAR { result = [ :zero_or_more ] }
|
35
|
-
| STAR QUESTION { result = [ :zero_or_more, :min ] }
|
36
|
-
| PLUS { result = [ :one_or_more ] }
|
37
|
-
| PLUS QUESTION { result = [ :one_or_more, :min ] }
|
38
|
-
| QUESTION { result = [ :zero_or_one ] }
|
39
|
-
| QUESTION QUESTION { result = [ :zero_or_one, :min ] }
|
40
|
-
| LC INTEGER RC { result = [ :exact, val[1].to_i ] }
|
41
|
-
| LC INTEGER COMMA INTEGER RC { result = [ :range, val[1].to_i, val[3].to_i ] }
|
42
|
-
| LC INTEGER COMMA RC { result = [ :range, val[1], '' ] }
|
43
|
-
| LC INTEGER COMMA RC QUESTION { result = [ :min, :range, val[1], '' ] }
|
44
|
-
| LC INTEGER COMMA INTEGER RC QUESTION { result = [ :min, :range, val[1].to_i, val[3].to_i ] }
|
45
|
-
|
46
|
-
|
47
|
-
---- inner
|
48
|
-
require 'fabulator/grammar'
|
49
|
-
|
50
|
-
def parse(t, ctx)
|
51
|
-
@source = t
|
52
|
-
@curpos = 0
|
53
|
-
@context = ctx
|
54
|
-
@line = 0
|
55
|
-
|
56
|
-
@yydebug = true
|
57
|
-
|
58
|
-
@last_token = nil
|
59
|
-
|
60
|
-
do_parse
|
61
|
-
end
|
62
|
-
|
63
|
-
def on_error(*args)
|
64
|
-
raise Fabulator::Grammar::ParserError.new("unable to parse '#{args[1]}' near line #{@line + 1}, column #{@col}")
|
65
|
-
end
|
66
|
-
|
67
|
-
@@regex = {
|
68
|
-
:ncname => %r{(?:[a-zA-Z_][-a-zA-Z0-9_.]*)}
|
69
|
-
}
|
70
|
-
|
71
|
-
@@regex[:qname] = %r{((?:#{@@regex[:ncname]}:)?#{@@regex[:ncname]})}
|
72
|
-
|
73
|
-
def next_token
|
74
|
-
@token = nil
|
75
|
-
white_space = 0
|
76
|
-
new_line = 0
|
77
|
-
@col = 0
|
78
|
-
while @curpos < @source.length && @source[@curpos..@curpos] =~ /\s/ do
|
79
|
-
if @source[@curpos..@curpos] =~ /\n/
|
80
|
-
new_line = new_line + 1
|
81
|
-
@line = @line + 1
|
82
|
-
@col = 0
|
83
|
-
else
|
84
|
-
@col = @col + 1
|
85
|
-
end
|
86
|
-
@curpos = @curpos + 1
|
87
|
-
white_space = white_space + 1
|
88
|
-
end
|
89
|
-
|
90
|
-
# skip comments delimited by (: :)
|
91
|
-
# comments can be nested
|
92
|
-
# these are XPath 2.0 comments
|
93
|
-
#
|
94
|
-
if @curpos < @source.length && @source[@curpos..@curpos+1] == '(:'
|
95
|
-
comment_depth = 1
|
96
|
-
@curpos = @curpos + 2
|
97
|
-
@col = @col + 2
|
98
|
-
while comment_depth > 0 && @curpos < @source.length
|
99
|
-
if @source[@curpos..@curpos+1] == '(:'
|
100
|
-
comment_depth = comment_depth + 1
|
101
|
-
@curpos = @curpos + 1
|
102
|
-
@col = @col + 1
|
103
|
-
end
|
104
|
-
if @source[@curpos..@curpos+1] == ':)'
|
105
|
-
comment_depth = comment_depth - 1
|
106
|
-
@curpos = @curpos + 1
|
107
|
-
@col = @col + 1
|
108
|
-
end
|
109
|
-
@curpos = @curpos + 1
|
110
|
-
@col = @col + 1
|
111
|
-
end
|
112
|
-
white_space = white_space + 1
|
113
|
-
end
|
114
|
-
|
115
|
-
while @curpos < @source.length && @source[@curpos..@curpos] =~ /\s/ do
|
116
|
-
if @source[@curpos..@curpos] =~ /\n/
|
117
|
-
new_line = new_line + 1
|
118
|
-
@line = @line + 1
|
119
|
-
@col = 0
|
120
|
-
else
|
121
|
-
@col = @col + 1
|
122
|
-
end
|
123
|
-
@curpos = @curpos + 1
|
124
|
-
white_space = white_space + 1
|
125
|
-
end
|
126
|
-
|
127
|
-
if @curpos >= @source.length
|
128
|
-
@last_token = nil
|
129
|
-
return [ false, false ]
|
130
|
-
end
|
131
|
-
|
132
|
-
case @source[@curpos..@curpos]
|
133
|
-
when '<': @token = [ :LT, '<' ]
|
134
|
-
when '>': @token = [ :GT, '>' ]
|
135
|
-
when '[': @token = [ :LB, '[' ]
|
136
|
-
when ']': @token = [ :RB, ']' ]
|
137
|
-
when '(': @token = [ :LP, '(' ]
|
138
|
-
when ')': @token = [ :RP, ')' ]
|
139
|
-
when '{': @token = [ :LC, '{' ]
|
140
|
-
when '}': @token = [ :RC, '}' ]
|
141
|
-
when ':': @token = [ :COLON, ':' ]
|
142
|
-
when ',': @token = [ :COMMA, ',' ]
|
143
|
-
when '|': @token = [ :PIPE, '|' ]
|
144
|
-
when '*': @token = [ :STAR, '*' ]
|
145
|
-
when '+': @token = [ :PLUS, '+' ]
|
146
|
-
when '.': @token = [ :DOT, '.' ]
|
147
|
-
when '?': @token = [ :QUESTION, '?' ]
|
148
|
-
when '$': @token = [ :DOLLAR, '$' ]
|
149
|
-
when '^': @token = [ :CARET, '^' ]
|
150
|
-
end
|
151
|
-
|
152
|
-
if @token.nil?
|
153
|
-
# get longest sequence of non-special characters
|
154
|
-
# if it's all digits, report INTEGER
|
155
|
-
# if it's a qname, report QNAME
|
156
|
-
# otherwise, report TEXT
|
157
|
-
@source[@curpos..@source.length-1] =~ /^(((\\.)|[^ \$\^\[\]<>\{\}\(\):,|*+.?])+)*/
|
158
|
-
text = $1
|
159
|
-
bits = text.split(/\\/)
|
160
|
-
text = bits.join('')
|
161
|
-
@curpos += bits.size - 1
|
162
|
-
if text.length > 0
|
163
|
-
if @source[@curpos+text.length .. @curpos+text.length] =~ /[*?+\{]/
|
164
|
-
text = text[0..text.length-2]
|
165
|
-
@token = [ :TEXT, text ]
|
166
|
-
else
|
167
|
-
case text
|
168
|
-
when /^\d+$/: @token = [ :INTEGER, text ]
|
169
|
-
when /^#{@@regex[:ncname]}$/: @token = [ :NCNAME, text ]
|
170
|
-
else @token = [ :TEXT, text ]
|
171
|
-
end
|
172
|
-
end
|
173
|
-
end
|
174
|
-
end
|
175
|
-
|
176
|
-
if @token.nil?
|
177
|
-
puts "Uh oh... we don't know what to do: #{@source[@curpos .. @source.length-1]}"
|
178
|
-
else
|
179
|
-
@curpos += @token[1].length
|
180
|
-
end
|
181
|
-
|
182
|
-
return @token
|
183
|
-
end
|