peg 0.0.4 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/peg.rb +0 -445
- metadata +9 -8
data/lib/peg.rb
CHANGED
@@ -1,445 +0,0 @@
|
|
1
|
-
module PEG
|
2
|
-
class ValueObject
|
3
|
-
def ==(other)
|
4
|
-
inspect == other.inspect
|
5
|
-
end
|
6
|
-
end
|
7
|
-
|
8
|
-
class Node < ValueObject
|
9
|
-
attr_accessor :text, :children, :name
|
10
|
-
|
11
|
-
def initialize(text, children=[], name=nil)
|
12
|
-
@text, @children, @name = text, children, name
|
13
|
-
end
|
14
|
-
|
15
|
-
def inspect
|
16
|
-
"#{self.class}.new(#{text.inspect}, #{children.inspect}, #{name.inspect})"
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
class Rule < ValueObject
|
21
|
-
attr_accessor :children
|
22
|
-
|
23
|
-
def initialize(*children)
|
24
|
-
@children = children
|
25
|
-
end
|
26
|
-
|
27
|
-
def name(value=nil)
|
28
|
-
if value
|
29
|
-
@name = value
|
30
|
-
self
|
31
|
-
else
|
32
|
-
@name
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
def parse(source)
|
37
|
-
node = match(source)
|
38
|
-
if node.text.length != source.length
|
39
|
-
raise SyntaxError.new source[node.text.length, 50].inspect
|
40
|
-
else
|
41
|
-
node
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
def result(text, children=[])
|
46
|
-
Node.new(text, children, @name)
|
47
|
-
end
|
48
|
-
|
49
|
-
def inspect
|
50
|
-
repr = "#{self.class}.new(#{_inspect})"
|
51
|
-
@name ? repr + ".name(#{@name.inspect})" : repr
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
class Literal < Rule
|
56
|
-
def initialize(literal)
|
57
|
-
@literal = literal
|
58
|
-
@children = []
|
59
|
-
end
|
60
|
-
|
61
|
-
def match(text)
|
62
|
-
text.start_with?(@literal) ? result(@literal) : nil
|
63
|
-
end
|
64
|
-
|
65
|
-
def _inspect
|
66
|
-
@literal.inspect
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
class Regex < Literal
|
71
|
-
def match(text)
|
72
|
-
res = Regexp.new('\A' + @literal).match(text)
|
73
|
-
res && result(res.to_s)
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
class Sequence < Rule
|
78
|
-
def match(text)
|
79
|
-
text_ = String.new(text)
|
80
|
-
len = 0
|
81
|
-
children = []
|
82
|
-
@children.each do |child|
|
83
|
-
node = child.match(text_)
|
84
|
-
if node == nil
|
85
|
-
return nil
|
86
|
-
else
|
87
|
-
children << node
|
88
|
-
text_ = text_.slice node.text.length..text_.length
|
89
|
-
len += node.text.length
|
90
|
-
end
|
91
|
-
end
|
92
|
-
result(text.slice(0...len), children)
|
93
|
-
end
|
94
|
-
|
95
|
-
def _inspect
|
96
|
-
@children.map(&:inspect).join(', ')
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
class Or < Sequence
|
101
|
-
def match(text)
|
102
|
-
@children.each do |child|
|
103
|
-
node = child.match(text)
|
104
|
-
return result(node.text, [node]) if node
|
105
|
-
end
|
106
|
-
nil
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|
110
|
-
class Not < Sequence
|
111
|
-
def match(text)
|
112
|
-
@children[0].match(text) ? nil : result('')
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
class And < Sequence
|
117
|
-
def match(text)
|
118
|
-
@children[0].match(text) ? result('') : nil
|
119
|
-
end
|
120
|
-
end
|
121
|
-
|
122
|
-
class OneOrMore < Sequence
|
123
|
-
@range = (1..Float::INFINITY)
|
124
|
-
|
125
|
-
class << self
|
126
|
-
attr_accessor :range
|
127
|
-
end
|
128
|
-
|
129
|
-
def match(text)
|
130
|
-
text_ = String.new(text)
|
131
|
-
len = 0
|
132
|
-
children = []
|
133
|
-
loop do
|
134
|
-
node = @children[0].match(text_)
|
135
|
-
break if not node
|
136
|
-
children << node
|
137
|
-
break if node.text == ''
|
138
|
-
text_ = text_.slice node.text.length..text_.length
|
139
|
-
len += node.text.length
|
140
|
-
end
|
141
|
-
in_range = self.class.range.include?(children.length)
|
142
|
-
in_range ? result(text.slice(0...len), children) : nil
|
143
|
-
end
|
144
|
-
end
|
145
|
-
|
146
|
-
class ZeroOrMore < OneOrMore
|
147
|
-
@range = (0..Float::INFINITY)
|
148
|
-
end
|
149
|
-
|
150
|
-
class Optional < OneOrMore
|
151
|
-
@range = (0..1)
|
152
|
-
end
|
153
|
-
|
154
|
-
class Reference < Rule
|
155
|
-
attr_reader :reference
|
156
|
-
|
157
|
-
def initialize(name)
|
158
|
-
@reference = name
|
159
|
-
@children = []
|
160
|
-
end
|
161
|
-
|
162
|
-
def _inspect
|
163
|
-
@reference.inspect
|
164
|
-
end
|
165
|
-
end
|
166
|
-
|
167
|
-
class Visitor
|
168
|
-
def self.visit(node)
|
169
|
-
return node if node.name == nil
|
170
|
-
send(node.name, node, node.children.map {|c| visit(c)})
|
171
|
-
end
|
172
|
-
end
|
173
|
-
|
174
|
-
class GrammarGenerator < Visitor
|
175
|
-
def self.identifier__regex(node, children)
|
176
|
-
node.text
|
177
|
-
end
|
178
|
-
|
179
|
-
def self.identifier(node, children)
|
180
|
-
identifier_regex, spacing = children
|
181
|
-
Reference.new(identifier_regex)
|
182
|
-
end
|
183
|
-
|
184
|
-
def self.literal(node, children)
|
185
|
-
Literal.new(Kernel.eval(node.text))
|
186
|
-
end
|
187
|
-
|
188
|
-
def self.dot(node, children)
|
189
|
-
Regex.new('.')
|
190
|
-
end
|
191
|
-
|
192
|
-
def self.class(node, children)
|
193
|
-
class_, spacing = children
|
194
|
-
Regex.new(class_.text)
|
195
|
-
end
|
196
|
-
|
197
|
-
def self.definition(node, children)
|
198
|
-
identifier, left_arrow, expression = children
|
199
|
-
expression.name(identifier.reference)
|
200
|
-
end
|
201
|
-
|
202
|
-
def self.expression(node, children)
|
203
|
-
sequence, rest = children
|
204
|
-
rest.length == 0 ? sequence : Or.new(sequence, *rest)
|
205
|
-
end
|
206
|
-
|
207
|
-
def self.expression__zeroormore(node, children)
|
208
|
-
children
|
209
|
-
end
|
210
|
-
|
211
|
-
def self.expression__sequence(node, children)
|
212
|
-
slash, sequence = children
|
213
|
-
sequence
|
214
|
-
end
|
215
|
-
|
216
|
-
def self.grammar(node, children)
|
217
|
-
spacing, definitions = children
|
218
|
-
definitions
|
219
|
-
end
|
220
|
-
|
221
|
-
def self.grammar__oneormore(node, children)
|
222
|
-
children
|
223
|
-
end
|
224
|
-
|
225
|
-
def self.primary(node, children)
|
226
|
-
children[0]
|
227
|
-
end
|
228
|
-
|
229
|
-
def self.primary__sequence(node, children)
|
230
|
-
identifier, not_left_arrow = children
|
231
|
-
identifier
|
232
|
-
end
|
233
|
-
|
234
|
-
def self.primary__parens(node, children)
|
235
|
-
open, expression, close = children
|
236
|
-
expression
|
237
|
-
end
|
238
|
-
|
239
|
-
def self.prefix__optional(node, children)
|
240
|
-
node.text.strip # HACK
|
241
|
-
end
|
242
|
-
|
243
|
-
def self.prefix(node, children)
|
244
|
-
prefix, suffix = children
|
245
|
-
prefix == '' ? suffix : {'&' => And, '!' => Not}.fetch(prefix).new(suffix)
|
246
|
-
end
|
247
|
-
|
248
|
-
def self.sequence(node, children)
|
249
|
-
children.length == 1 ? children[0] : Sequence.new(*children)
|
250
|
-
end
|
251
|
-
|
252
|
-
def self.suffix__optional(node, children)
|
253
|
-
node.text.strip # HACK
|
254
|
-
end
|
255
|
-
|
256
|
-
def self.suffix(node, children)
|
257
|
-
primary, optional_suffix = children
|
258
|
-
optional_suffix == '' ? primary : {
|
259
|
-
'?' => Optional,
|
260
|
-
'*' => ZeroOrMore,
|
261
|
-
'+' => OneOrMore,
|
262
|
-
}.fetch(optional_suffix).new(primary)
|
263
|
-
end
|
264
|
-
end
|
265
|
-
|
266
|
-
class Grammar < Sequence
|
267
|
-
def initialize(source)
|
268
|
-
@_nodes = peg_grammar.parse(source)
|
269
|
-
@children = [ReferenceResolver.new(grammar).resolve]
|
270
|
-
end
|
271
|
-
|
272
|
-
def match(source)
|
273
|
-
@children[0].match(source)
|
274
|
-
end
|
275
|
-
|
276
|
-
def grammar
|
277
|
-
GrammarGenerator.visit(@_nodes)
|
278
|
-
end
|
279
|
-
|
280
|
-
def peg_grammar
|
281
|
-
end_of_line = Or.new(
|
282
|
-
Literal.new("\r\n"),
|
283
|
-
Literal.new("\n"),
|
284
|
-
Literal.new("\r"),
|
285
|
-
)
|
286
|
-
space = Or.new(Literal.new(" "), Literal.new("\t"), end_of_line)
|
287
|
-
comment = Sequence.new(
|
288
|
-
Literal.new('#'),
|
289
|
-
ZeroOrMore.new(
|
290
|
-
Sequence.new(Not.new(end_of_line), Regex.new('.')),
|
291
|
-
),
|
292
|
-
end_of_line,
|
293
|
-
)
|
294
|
-
spacing = ZeroOrMore.new(Or.new(space, comment))
|
295
|
-
|
296
|
-
and_ = Sequence.new(Literal.new('&'), spacing)
|
297
|
-
not_ = Sequence.new(Literal.new('!'), spacing)
|
298
|
-
slash = Sequence.new(Literal.new('/'), spacing)
|
299
|
-
left_arrow = Sequence.new(Literal.new('<-'), spacing)
|
300
|
-
question = Sequence.new(Literal.new('?'), spacing)
|
301
|
-
star = Sequence.new(Literal.new('*'), spacing)
|
302
|
-
plus = Sequence.new(Literal.new('+'), spacing)
|
303
|
-
open = Sequence.new(Literal.new('('), spacing)
|
304
|
-
close = Sequence.new(Literal.new(')'), spacing)
|
305
|
-
dot = Sequence.new(Literal.new('.'), spacing).name('dot')
|
306
|
-
|
307
|
-
# HACK these three rules are simplified
|
308
|
-
literal = Sequence.new(
|
309
|
-
Or.new(Regex.new("'.*?'"), Regex.new('".*?"')),
|
310
|
-
spacing
|
311
|
-
).name('literal')
|
312
|
-
class_ = Sequence.new(Regex.new('\[.*?\]'), spacing).name('class')
|
313
|
-
identifier = Sequence.new(
|
314
|
-
Regex.new('[A-Za-z0-9_]+').name('identifier__regex'),
|
315
|
-
spacing
|
316
|
-
).name('identifier')
|
317
|
-
|
318
|
-
primary = Or.new(
|
319
|
-
Sequence.new(
|
320
|
-
identifier,
|
321
|
-
Not.new(left_arrow)
|
322
|
-
).name('primary__sequence'),
|
323
|
-
Sequence.new(
|
324
|
-
open,
|
325
|
-
'EXPRESSION', # paceholder for future substitution
|
326
|
-
close
|
327
|
-
).name('primary__parens'),
|
328
|
-
literal,
|
329
|
-
class_,
|
330
|
-
dot,
|
331
|
-
).name('primary')
|
332
|
-
suffix = Sequence.new(
|
333
|
-
primary,
|
334
|
-
Optional.new(
|
335
|
-
Or.new(question, star, plus)
|
336
|
-
).name('suffix__optional'),
|
337
|
-
).name('suffix')
|
338
|
-
prefix = Sequence.new(
|
339
|
-
Optional.new(
|
340
|
-
Or.new(and_, not_)
|
341
|
-
).name('prefix__optional'),
|
342
|
-
suffix
|
343
|
-
).name('prefix')
|
344
|
-
sequence = ZeroOrMore.new(prefix).name('sequence')
|
345
|
-
expression = Sequence.new(
|
346
|
-
sequence,
|
347
|
-
ZeroOrMore.new(
|
348
|
-
Sequence.new(
|
349
|
-
slash,
|
350
|
-
sequence
|
351
|
-
).name('expression__sequence')
|
352
|
-
).name('expression__zeroormore')
|
353
|
-
).name('expression')
|
354
|
-
if primary.children[1].children[1] != 'EXPRESSION'
|
355
|
-
raise 'Invalid PEG grammar'
|
356
|
-
else
|
357
|
-
primary.children[1].children[1] = expression
|
358
|
-
end
|
359
|
-
definition = Sequence.new(
|
360
|
-
identifier,
|
361
|
-
left_arrow,
|
362
|
-
expression
|
363
|
-
).name('definition')
|
364
|
-
# In the original PEG paper `grammar` is specified as:
|
365
|
-
# grammar <- spacing definition+ end_of_file
|
366
|
-
# but we skip `end_of_file` allowing the grammar to
|
367
|
-
# match just a part of source in order to know where
|
368
|
-
# the syntax error occured.
|
369
|
-
grammar = Sequence.new(
|
370
|
-
spacing,
|
371
|
-
OneOrMore.new(definition).name('grammar__oneormore')
|
372
|
-
).name('grammar')
|
373
|
-
|
374
|
-
grammar
|
375
|
-
end
|
376
|
-
end
|
377
|
-
|
378
|
-
class ReferenceResolver
|
379
|
-
def initialize(rules)
|
380
|
-
rules = rules.map {|rule| [rule.name, rule]}
|
381
|
-
@rules = Hash[rules]
|
382
|
-
end
|
383
|
-
|
384
|
-
def resolve
|
385
|
-
name, rule = @rules.first
|
386
|
-
_resolve(rule)
|
387
|
-
end
|
388
|
-
|
389
|
-
def _resolve(rule)
|
390
|
-
if rule.class == Reference
|
391
|
-
rule = @rules[rule.reference]
|
392
|
-
_resolve(rule)
|
393
|
-
else
|
394
|
-
old_children = rule.children
|
395
|
-
rule.children = [] # avoid infinite reqursion of _resolve
|
396
|
-
new_children = old_children.map {|child| _resolve(child)}
|
397
|
-
rule.children = new_children
|
398
|
-
rule
|
399
|
-
end
|
400
|
-
end
|
401
|
-
end
|
402
|
-
|
403
|
-
class Language
|
404
|
-
@@default = proc {|node, children| children}
|
405
|
-
# we rely on the fact that 1.9+ Hash maintains order
|
406
|
-
@@rules = {}
|
407
|
-
@@blocks = {}
|
408
|
-
|
409
|
-
def self.rule(rule, &block)
|
410
|
-
name = rule.split('<-')[0].strip
|
411
|
-
@@rules[name] = rule
|
412
|
-
@@blocks[name] = block
|
413
|
-
end
|
414
|
-
|
415
|
-
def self.default(&block)
|
416
|
-
@@default = block
|
417
|
-
end
|
418
|
-
|
419
|
-
def to_lambda(&block)
|
420
|
-
obj = Object.new
|
421
|
-
obj.define_singleton_method(:_, &block)
|
422
|
-
return obj.method(:_).to_proc
|
423
|
-
end
|
424
|
-
|
425
|
-
def eval(source)
|
426
|
-
if source.class == String
|
427
|
-
grammar_source = @@rules.values.join("\n")
|
428
|
-
source = Grammar.new(grammar_source).parse(source)
|
429
|
-
end
|
430
|
-
_eval(source)
|
431
|
-
end
|
432
|
-
|
433
|
-
def _eval(node)
|
434
|
-
block = @@blocks[node.name] || @@default
|
435
|
-
if block.arity == 2
|
436
|
-
children = node.children.map {|child| _eval(child)}
|
437
|
-
instance_exec(node, children, &block)
|
438
|
-
elsif block.arity == 1
|
439
|
-
instance_exec(node, &block)
|
440
|
-
else
|
441
|
-
raise "`rule` expects a block with signature |node| or |node, children|"
|
442
|
-
end
|
443
|
-
end
|
444
|
-
end
|
445
|
-
end
|
metadata
CHANGED
@@ -1,24 +1,25 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: peg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
|
-
-
|
8
|
+
- Pete Otaqui
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2012-05-04 00:00:00.000000000Z
|
13
13
|
dependencies: []
|
14
|
-
description:
|
15
|
-
|
14
|
+
description: Peg generates various project types for you, smoothing the process of
|
15
|
+
setting up sirectories, tests and package distribution.
|
16
|
+
email: pete@otaqui.com
|
16
17
|
executables: []
|
17
18
|
extensions: []
|
18
19
|
extra_rdoc_files: []
|
19
20
|
files:
|
20
21
|
- lib/peg.rb
|
21
|
-
homepage:
|
22
|
+
homepage: https://github.com/pete-otaqui/peg
|
22
23
|
licenses: []
|
23
24
|
post_install_message:
|
24
25
|
rdoc_options: []
|
@@ -38,8 +39,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
38
39
|
version: '0'
|
39
40
|
requirements: []
|
40
41
|
rubyforge_project:
|
41
|
-
rubygems_version: 1.8.
|
42
|
+
rubygems_version: 1.8.15
|
42
43
|
signing_key:
|
43
44
|
specification_version: 3
|
44
|
-
summary:
|
45
|
+
summary: Minimal project generator
|
45
46
|
test_files: []
|