citrus 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +86 -0
- data/Rakefile +67 -0
- data/citrus.gemspec +29 -0
- data/examples/calc.citrus +103 -0
- data/examples/calc.rb +95 -0
- data/examples/calc_sugar.rb +94 -0
- data/lib/citrus.rb +904 -0
- data/lib/citrus/debug.rb +37 -0
- data/lib/citrus/peg.rb +375 -0
- data/lib/citrus/sugar.rb +25 -0
- data/test/alias_test.rb +66 -0
- data/test/and_predicate_test.rb +27 -0
- data/test/calc_peg_test.rb +6 -0
- data/test/calc_sugar_test.rb +6 -0
- data/test/calc_test.rb +6 -0
- data/test/choice_test.rb +62 -0
- data/test/expression_test.rb +29 -0
- data/test/fixed_width_test.rb +37 -0
- data/test/grammar_test.rb +129 -0
- data/test/helper.rb +143 -0
- data/test/label_test.rb +26 -0
- data/test/match_test.rb +76 -0
- data/test/not_predicate_test.rb +27 -0
- data/test/peg_test.rb +663 -0
- data/test/repeat_test.rb +93 -0
- data/test/rule_test.rb +49 -0
- data/test/sequence_test.rb +53 -0
- data/test/super_test.rb +66 -0
- metadata +133 -0
data/lib/citrus/debug.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'citrus'
|
2
|
+
require 'builder'
|
3
|
+
|
4
|
+
module Citrus
|
5
|
+
class Match
|
6
|
+
# Creates a Builder::XmlMarkup object from this match. Useful when
|
7
|
+
# inspecting a nested match. The +xml+ argument may be a Hash of
|
8
|
+
# Builder::XmlMarkup options.
|
9
|
+
def to_markup(xml={})
|
10
|
+
if xml.is_a?(Hash)
|
11
|
+
opt = { :indent => 2 }.merge(xml)
|
12
|
+
xml = Builder::XmlMarkup.new(opt)
|
13
|
+
xml.instruct!
|
14
|
+
end
|
15
|
+
|
16
|
+
if matches.empty?
|
17
|
+
xml.match("name" => name, "text" => text, "offset" => offset)
|
18
|
+
else
|
19
|
+
xml.match("name" => name, "text" => text, "offset" => offset) do
|
20
|
+
matches.each {|m| m.to_markup(xml) }
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
xml
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns the target of #to_markup which is an XML string unless another
|
28
|
+
# target is specified in +opt+.
|
29
|
+
def to_xml(opt={})
|
30
|
+
to_markup(opt).target!
|
31
|
+
end
|
32
|
+
|
33
|
+
def inspect # :nodoc:
|
34
|
+
to_xml
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/citrus/peg.rb
ADDED
@@ -0,0 +1,375 @@
|
|
1
|
+
require 'citrus'
|
2
|
+
|
3
|
+
module Citrus
|
4
|
+
# A grammar for Citrus-flavored parsing expression grammars. This module is
|
5
|
+
# used in Citrus#eval to parse and evaluate Citrus PEG's and serves as a prime
|
6
|
+
# example of how to create a complex grammar complete with semantic
|
7
|
+
# interpretation in pure Ruby.
|
8
|
+
module PEG
|
9
|
+
include Grammar
|
10
|
+
|
11
|
+
## Hierarchical syntax
|
12
|
+
|
13
|
+
rule :file do
|
14
|
+
all(:space, zero_or_more(any(:require, :grammar))) {
|
15
|
+
def requires
|
16
|
+
find(:require)
|
17
|
+
end
|
18
|
+
|
19
|
+
def grammars
|
20
|
+
find(:grammar)
|
21
|
+
end
|
22
|
+
|
23
|
+
def value
|
24
|
+
requires.each {|r| require r.value }
|
25
|
+
grammars.map {|g| g.value }
|
26
|
+
end
|
27
|
+
}
|
28
|
+
end
|
29
|
+
|
30
|
+
rule :grammar do
|
31
|
+
all(:grammar_keyword, :module_name, :grammar_body, :end_keyword) {
|
32
|
+
def includes
|
33
|
+
find(:include)
|
34
|
+
end
|
35
|
+
|
36
|
+
def modules
|
37
|
+
includes.map {|inc| eval(inc.value, TOPLEVEL_BINDING) }
|
38
|
+
end
|
39
|
+
|
40
|
+
def root
|
41
|
+
find(:root).last
|
42
|
+
end
|
43
|
+
|
44
|
+
def rules
|
45
|
+
find(:rule)
|
46
|
+
end
|
47
|
+
|
48
|
+
def value
|
49
|
+
code = '%s = Citrus::Grammar.new' % module_name.value
|
50
|
+
grammar = eval(code, TOPLEVEL_BINDING)
|
51
|
+
modules.each {|mod| grammar.include(mod) }
|
52
|
+
grammar.root(root.value) if root
|
53
|
+
rules.each {|rule| grammar.rule(rule.rule_name.value, rule.value) }
|
54
|
+
grammar
|
55
|
+
end
|
56
|
+
}
|
57
|
+
end
|
58
|
+
|
59
|
+
rule :grammar_body do
|
60
|
+
zero_or_more(any(:include, :root, :rule))
|
61
|
+
end
|
62
|
+
|
63
|
+
rule :rule do
|
64
|
+
all(:rule_keyword, :rule_name, :rule_body, :end_keyword) {
|
65
|
+
def value
|
66
|
+
rule_body.value
|
67
|
+
end
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
rule :rule_body do
|
72
|
+
all(:sequence, :choice) {
|
73
|
+
def choices
|
74
|
+
@choices ||= [ sequence ] + choice.sequences
|
75
|
+
end
|
76
|
+
|
77
|
+
def values
|
78
|
+
choices.map {|s| s.value }
|
79
|
+
end
|
80
|
+
|
81
|
+
def value
|
82
|
+
choices.length > 1 ? Choice.new(values) : values[0]
|
83
|
+
end
|
84
|
+
}
|
85
|
+
end
|
86
|
+
|
87
|
+
rule :choice do
|
88
|
+
zero_or_more([ :bar, :sequence ]) {
|
89
|
+
def sequences
|
90
|
+
matches.map {|m| m.matches[1] }
|
91
|
+
end
|
92
|
+
}
|
93
|
+
end
|
94
|
+
|
95
|
+
rule :sequence do
|
96
|
+
zero_or_more(:prefix) {
|
97
|
+
def values
|
98
|
+
matches.map {|m| m.value }
|
99
|
+
end
|
100
|
+
|
101
|
+
def value
|
102
|
+
matches.length > 1 ? Sequence.new(values) : values[0]
|
103
|
+
end
|
104
|
+
}
|
105
|
+
end
|
106
|
+
|
107
|
+
rule :prefix do
|
108
|
+
all(zero_or_one(:qualifier), :appendix) {
|
109
|
+
def value
|
110
|
+
rule = appendix.value
|
111
|
+
qualifier = matches[0].first
|
112
|
+
rule = qualifier.wrap(rule) if qualifier
|
113
|
+
rule
|
114
|
+
end
|
115
|
+
}
|
116
|
+
end
|
117
|
+
|
118
|
+
rule :appendix do
|
119
|
+
all(:suffix, zero_or_one(:extension)) {
|
120
|
+
def value
|
121
|
+
rule = suffix.value
|
122
|
+
extension = matches[1].first
|
123
|
+
rule = extension.wrap(rule) if extension
|
124
|
+
rule
|
125
|
+
end
|
126
|
+
}
|
127
|
+
end
|
128
|
+
|
129
|
+
rule :suffix do
|
130
|
+
all(:primary, zero_or_one(:quantifier)) {
|
131
|
+
def value
|
132
|
+
rule = primary.value
|
133
|
+
quantifier = matches[1].first
|
134
|
+
rule = quantifier.wrap(rule) if quantifier
|
135
|
+
rule
|
136
|
+
end
|
137
|
+
}
|
138
|
+
end
|
139
|
+
|
140
|
+
rule :primary do
|
141
|
+
any(:super, :alias, :rule_body_paren, :terminal) {
|
142
|
+
def value
|
143
|
+
first.value
|
144
|
+
end
|
145
|
+
}
|
146
|
+
end
|
147
|
+
|
148
|
+
rule :rule_body_paren do
|
149
|
+
all(:lparen, :rule_body, :rparen) {
|
150
|
+
def value
|
151
|
+
rule_body.value
|
152
|
+
end
|
153
|
+
}
|
154
|
+
end
|
155
|
+
|
156
|
+
## Lexical syntax
|
157
|
+
|
158
|
+
rule :require do
|
159
|
+
all(:require_keyword, :quoted_string) {
|
160
|
+
def value
|
161
|
+
quoted_string.value
|
162
|
+
end
|
163
|
+
}
|
164
|
+
end
|
165
|
+
|
166
|
+
rule :include do
|
167
|
+
all(:include_keyword, :module_name) {
|
168
|
+
def value
|
169
|
+
module_name.value
|
170
|
+
end
|
171
|
+
}
|
172
|
+
end
|
173
|
+
|
174
|
+
rule :root do
|
175
|
+
all(:root_keyword, :rule_name) {
|
176
|
+
def value
|
177
|
+
rule_name.value
|
178
|
+
end
|
179
|
+
}
|
180
|
+
end
|
181
|
+
|
182
|
+
rule :rule_name do
|
183
|
+
all(/[a-z][a-zA-Z0-9_]*/, :space) {
|
184
|
+
def value
|
185
|
+
first.text
|
186
|
+
end
|
187
|
+
}
|
188
|
+
end
|
189
|
+
|
190
|
+
rule :super do
|
191
|
+
all('super', :space) {
|
192
|
+
def value
|
193
|
+
Super.new
|
194
|
+
end
|
195
|
+
}
|
196
|
+
end
|
197
|
+
|
198
|
+
rule :alias do
|
199
|
+
all(notp(:end_keyword), :rule_name) {
|
200
|
+
def value
|
201
|
+
Alias.new(rule_name.value)
|
202
|
+
end
|
203
|
+
}
|
204
|
+
end
|
205
|
+
|
206
|
+
rule :terminal do
|
207
|
+
any(:quoted_string, :character_class, :anything_symbol, :regular_expression) {
|
208
|
+
def value
|
209
|
+
Rule.create(first.value)
|
210
|
+
end
|
211
|
+
}
|
212
|
+
end
|
213
|
+
|
214
|
+
rule :quoted_string do
|
215
|
+
all(/(["'])(?:\\?.)*?\1/, :space) {
|
216
|
+
def value
|
217
|
+
eval(first.text)
|
218
|
+
end
|
219
|
+
}
|
220
|
+
end
|
221
|
+
|
222
|
+
rule :character_class do
|
223
|
+
all(/\[(?:\\?.)*?\]/, :space) {
|
224
|
+
def value
|
225
|
+
Regexp.new(first.text)
|
226
|
+
end
|
227
|
+
}
|
228
|
+
end
|
229
|
+
|
230
|
+
rule :anything_symbol do
|
231
|
+
all('.', :space) {
|
232
|
+
def value
|
233
|
+
/./m # The dot matches newlines
|
234
|
+
end
|
235
|
+
}
|
236
|
+
end
|
237
|
+
|
238
|
+
rule :regular_expression do
|
239
|
+
all(/\/(?:\\?.)*?\/[imxouesn]*/, :space) {
|
240
|
+
def value
|
241
|
+
eval(first.text)
|
242
|
+
end
|
243
|
+
}
|
244
|
+
end
|
245
|
+
|
246
|
+
rule :qualifier do
|
247
|
+
any(:and, :not, :label) {
|
248
|
+
def wrap(rule)
|
249
|
+
first.wrap(rule)
|
250
|
+
end
|
251
|
+
}
|
252
|
+
end
|
253
|
+
|
254
|
+
rule :and do
|
255
|
+
all('&', :space) {
|
256
|
+
def wrap(rule)
|
257
|
+
AndPredicate.new(rule)
|
258
|
+
end
|
259
|
+
}
|
260
|
+
end
|
261
|
+
|
262
|
+
rule :not do
|
263
|
+
all('!', :space) {
|
264
|
+
def wrap(rule)
|
265
|
+
NotPredicate.new(rule)
|
266
|
+
end
|
267
|
+
}
|
268
|
+
end
|
269
|
+
|
270
|
+
rule :label do
|
271
|
+
all(/[a-zA-Z0-9_]+/, :space, ':', :space) {
|
272
|
+
def wrap(rule)
|
273
|
+
Label.new(value, rule)
|
274
|
+
end
|
275
|
+
|
276
|
+
def value
|
277
|
+
first.text
|
278
|
+
end
|
279
|
+
}
|
280
|
+
end
|
281
|
+
|
282
|
+
rule :extension do
|
283
|
+
any(:tag, :block) {
|
284
|
+
def wrap(rule)
|
285
|
+
rule.ext = first.value
|
286
|
+
rule
|
287
|
+
end
|
288
|
+
}
|
289
|
+
end
|
290
|
+
|
291
|
+
rule :tag do
|
292
|
+
all(:lt, :module_name, :gt) {
|
293
|
+
def value
|
294
|
+
eval(module_name.value, TOPLEVEL_BINDING)
|
295
|
+
end
|
296
|
+
}
|
297
|
+
end
|
298
|
+
|
299
|
+
rule :block do
|
300
|
+
all(:lcurly, zero_or_more(any(:block, /[^{}]+/)), :rcurly) {
|
301
|
+
def value
|
302
|
+
eval('Proc.new ' + text)
|
303
|
+
end
|
304
|
+
}
|
305
|
+
end
|
306
|
+
|
307
|
+
rule :quantifier do
|
308
|
+
any(:question, :plus, :repeat) {
|
309
|
+
def min; first.min end
|
310
|
+
def max; first.max end
|
311
|
+
|
312
|
+
def wrap(rule)
|
313
|
+
Repeat.new(min, max, rule)
|
314
|
+
end
|
315
|
+
}
|
316
|
+
end
|
317
|
+
|
318
|
+
rule :question do
|
319
|
+
all('?', :space) {
|
320
|
+
def min; 0 end
|
321
|
+
def max; 1 end
|
322
|
+
}
|
323
|
+
end
|
324
|
+
|
325
|
+
rule :plus do
|
326
|
+
all('+', :space) {
|
327
|
+
def min; 1 end
|
328
|
+
def max; Infinity end
|
329
|
+
}
|
330
|
+
end
|
331
|
+
|
332
|
+
rule :repeat do
|
333
|
+
all(/[0-9]*/, '*', /[0-9]*/, :space) {
|
334
|
+
def min
|
335
|
+
matches[0] == '' ? 0 : matches[0].text.to_i
|
336
|
+
end
|
337
|
+
|
338
|
+
def max
|
339
|
+
matches[2] == '' ? Infinity : matches[2].text.to_i
|
340
|
+
end
|
341
|
+
}
|
342
|
+
end
|
343
|
+
|
344
|
+
rule :module_name do
|
345
|
+
all(one_or_more([ zero_or_one('::'), :constant ]), :space) {
|
346
|
+
def value
|
347
|
+
first.text
|
348
|
+
end
|
349
|
+
}
|
350
|
+
end
|
351
|
+
|
352
|
+
rule :constant do
|
353
|
+
/[A-Z][a-zA-Z0-9_]*/
|
354
|
+
end
|
355
|
+
|
356
|
+
rule :require_keyword, [ 'require', :space ]
|
357
|
+
rule :include_keyword, [ 'include', :space ]
|
358
|
+
rule :grammar_keyword, [ 'grammar', :space ]
|
359
|
+
rule :super_keyword, [ 'super', :space ]
|
360
|
+
rule :root_keyword, [ 'root', :space ]
|
361
|
+
rule :rule_keyword, [ 'rule', :space ]
|
362
|
+
rule :end_keyword, [ 'end', :space ]
|
363
|
+
rule :lparen, [ '(', :space ]
|
364
|
+
rule :rparen, [ ')', :space ]
|
365
|
+
rule :lcurly, [ '{', :space ]
|
366
|
+
rule :rcurly, [ '}', :space ]
|
367
|
+
rule :bar, [ '|', :space ]
|
368
|
+
rule :lt, [ '<', :space ]
|
369
|
+
rule :gt, [ '>', :space ]
|
370
|
+
|
371
|
+
rule :white, /[ \t\n\r]/
|
372
|
+
rule :comment, /#.*/
|
373
|
+
rule :space, zero_or_more(any(:white, :comment))
|
374
|
+
end
|
375
|
+
end
|
data/lib/citrus/sugar.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'citrus'
|
2
|
+
|
3
|
+
module Citrus
|
4
|
+
module GrammarMethods
|
5
|
+
# Permits creation of aliases within rule definitions in Ruby grammars using
|
6
|
+
# the bare name of another rule instead of a Symbol, e.g.:
|
7
|
+
#
|
8
|
+
# rule :value do
|
9
|
+
# any(:alpha, :num)
|
10
|
+
# end
|
11
|
+
#
|
12
|
+
# can now be written as
|
13
|
+
#
|
14
|
+
# rule value do
|
15
|
+
# any(alpha, num)
|
16
|
+
# end
|
17
|
+
#
|
18
|
+
# The only caveat is that since this hack uses +method_missing+ you must
|
19
|
+
# still use symbols for rules that have the same name as any of the methods
|
20
|
+
# in GrammarMethods (root, rule, rules, etc.)
|
21
|
+
def method_missing(sym, *args)
|
22
|
+
sym
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/test/alias_test.rb
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/helper'
|
2
|
+
Citrus.load(File.dirname(__FILE__) + '/_files/alias')
|
3
|
+
|
4
|
+
class AliasTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_terminal?
|
7
|
+
rule = Alias.new
|
8
|
+
assert_equal(false, rule.terminal?)
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_match
|
12
|
+
grammar = Grammar.new {
|
13
|
+
rule :a, :b
|
14
|
+
rule :b, 'b'
|
15
|
+
}
|
16
|
+
|
17
|
+
match = grammar.parse('b')
|
18
|
+
assert(match)
|
19
|
+
assert('b', match.text)
|
20
|
+
assert(1, match.length)
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_match_renamed
|
24
|
+
grammar = Grammar.new {
|
25
|
+
rule :a, ext(:b) {
|
26
|
+
def value
|
27
|
+
'a' + text
|
28
|
+
end
|
29
|
+
}
|
30
|
+
rule :b, 'b'
|
31
|
+
}
|
32
|
+
|
33
|
+
match = grammar.parse('b')
|
34
|
+
assert(match)
|
35
|
+
assert('ab', match.value)
|
36
|
+
|
37
|
+
assert_raise RuntimeError do
|
38
|
+
match.b
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_peg
|
43
|
+
match = AliasOne.parse('a')
|
44
|
+
assert(match)
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_included
|
48
|
+
grammar1 = Grammar.new {
|
49
|
+
rule :a, 'a'
|
50
|
+
}
|
51
|
+
|
52
|
+
grammar2 = Grammar.new {
|
53
|
+
include grammar1
|
54
|
+
rule :b, :a
|
55
|
+
}
|
56
|
+
|
57
|
+
match = grammar2.parse('a')
|
58
|
+
assert(match)
|
59
|
+
end
|
60
|
+
|
61
|
+
def test_to_s
|
62
|
+
rule = Alias.new(:alpha)
|
63
|
+
assert_equal('alpha', rule.to_s)
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|