peg 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/peg.rb +436 -0
  2. metadata +45 -0
@@ -0,0 +1,436 @@
1
+ module PEG
2
+ class ValueObject
3
+ def ==(other)
4
+ inspect == other.inspect
5
+ end
6
+ end
7
+
8
+ class Node < ValueObject
9
+ attr_accessor :text, :children, :name
10
+
11
+ def initialize(text, children=[], name=nil)
12
+ @text, @children, @name = text, children, name
13
+ end
14
+
15
+ def inspect
16
+ "#{self.class}.new(#{text.inspect}, #{children.inspect}, #{name.inspect})"
17
+ end
18
+ end
19
+
20
+ class Rule < ValueObject
21
+ attr_accessor :children
22
+
23
+ def initialize(*children)
24
+ @children = children
25
+ end
26
+
27
+ def name(value=nil)
28
+ if value
29
+ @name = value
30
+ self
31
+ else
32
+ @name
33
+ end
34
+ end
35
+
36
+ def parse(source)
37
+ node = match(source)
38
+ if node.text.length != source.length
39
+ raise SyntaxError.new source[node.text.length, 50].inspect
40
+ else
41
+ node
42
+ end
43
+ end
44
+
45
+ def result(text, children=[])
46
+ Node.new(text, children, @name)
47
+ end
48
+
49
+ def inspect
50
+ repr = "#{self.class}.new(#{_inspect})"
51
+ @name ? repr + ".name(#{@name.inspect})" : repr
52
+ end
53
+ end
54
+
55
+ class Literal < Rule
56
+ def initialize(literal)
57
+ @literal = literal
58
+ @children = []
59
+ end
60
+
61
+ def match(text)
62
+ text.start_with?(@literal) ? result(@literal) : nil
63
+ end
64
+
65
+ def _inspect
66
+ @literal.inspect
67
+ end
68
+ end
69
+
70
+ class Regex < Literal
71
+ def match(text)
72
+ res = Regexp.new('\A' + @literal).match(text)
73
+ res && result(res.to_s)
74
+ end
75
+ end
76
+
77
+ class Sequence < Rule
78
+ def match(text)
79
+ text_ = String.new(text)
80
+ len = 0
81
+ children = []
82
+ @children.each do |child|
83
+ node = child.match(text_)
84
+ if node == nil
85
+ return nil
86
+ else
87
+ children << node
88
+ text_ = text_.slice node.text.length..text_.length
89
+ len += node.text.length
90
+ end
91
+ end
92
+ result(text.slice(0...len), children)
93
+ end
94
+
95
+ def _inspect
96
+ @children.map(&:inspect).join(', ')
97
+ end
98
+ end
99
+
100
+ class Or < Sequence
101
+ def match(text)
102
+ @children.each do |child|
103
+ node = child.match(text)
104
+ return result(node.text, [node]) if node
105
+ end
106
+ nil
107
+ end
108
+ end
109
+
110
+ class Not < Sequence
111
+ def match(text)
112
+ @children[0].match(text) ? nil : result('')
113
+ end
114
+ end
115
+
116
+ class And < Sequence
117
+ def match(text)
118
+ @children[0].match(text) ? result('') : nil
119
+ end
120
+ end
121
+
122
+ class OneOrMore < Sequence
123
+ @range = (1..Float::INFINITY)
124
+
125
+ class << self
126
+ attr_accessor :range
127
+ end
128
+
129
+ def match(text)
130
+ text_ = String.new(text)
131
+ len = 0
132
+ children = []
133
+ loop do
134
+ node = @children[0].match(text_)
135
+ break if not node
136
+ children << node
137
+ break if node.text == ''
138
+ text_ = text_.slice node.text.length..text_.length
139
+ len += node.text.length
140
+ end
141
+ in_range = self.class.range.include?(children.length)
142
+ in_range ? result(text.slice(0...len), children) : nil
143
+ end
144
+ end
145
+
146
+ class ZeroOrMore < OneOrMore
147
+ @range = (0..Float::INFINITY)
148
+ end
149
+
150
+ class Optional < OneOrMore
151
+ @range = (0..1)
152
+ end
153
+
154
+ class Reference < Rule
155
+ attr_reader :reference
156
+
157
+ def initialize(name)
158
+ @reference = name
159
+ @children = []
160
+ end
161
+
162
+ def _inspect
163
+ @reference.inspect
164
+ end
165
+ end
166
+
167
+ class Visitor
168
+ def self.visit(node)
169
+ return node if node.name == nil
170
+ send(node.name, node, node.children.map {|c| visit(c)})
171
+ end
172
+ end
173
+
174
+ class GrammarGenerator < Visitor
175
+ def self.identifier__regex(node, children)
176
+ node.text
177
+ end
178
+
179
+ def self.identifier(node, children)
180
+ identifier_regex, spacing = children
181
+ Reference.new(identifier_regex)
182
+ end
183
+
184
+ def self.literal(node, children)
185
+ Literal.new(Kernel.eval(node.text))
186
+ end
187
+
188
+ def self.dot(node, children)
189
+ Regex.new('.')
190
+ end
191
+
192
+ def self.class(node, children)
193
+ class_, spacing = children
194
+ Regex.new(class_.text)
195
+ end
196
+
197
+ def self.definition(node, children)
198
+ identifier, left_arrow, expression = children
199
+ expression.name(identifier.reference)
200
+ end
201
+
202
+ def self.expression(node, children)
203
+ sequence, rest = children
204
+ rest.length == 0 ? sequence : Or.new(sequence, *rest)
205
+ end
206
+
207
+ def self.expression__zeroormore(node, children)
208
+ children
209
+ end
210
+
211
+ def self.expression__sequence(node, children)
212
+ slash, sequence = children
213
+ sequence
214
+ end
215
+
216
+ def self.grammar(node, children)
217
+ spacing, definitions = children
218
+ definitions
219
+ end
220
+
221
+ def self.grammar__oneormore(node, children)
222
+ children
223
+ end
224
+
225
+ def self.primary(node, children)
226
+ children[0]
227
+ end
228
+
229
+ def self.primary__sequence(node, children)
230
+ identifier, not_left_arrow = children
231
+ identifier
232
+ end
233
+
234
+ def self.primary__parens(node, children)
235
+ open, expression, close = children
236
+ expression
237
+ end
238
+
239
+ def self.prefix__optional(node, children)
240
+ node.text.strip # HACK
241
+ end
242
+
243
+ def self.prefix(node, children)
244
+ prefix, suffix = children
245
+ prefix == '' ? suffix : {'&' => And, '!' => Not}.fetch(prefix).new(suffix)
246
+ end
247
+
248
+ def self.sequence(node, children)
249
+ children.length == 1 ? children[0] : Sequence.new(*children)
250
+ end
251
+
252
+ def self.suffix__optional(node, children)
253
+ node.text.strip # HACK
254
+ end
255
+
256
+ def self.suffix(node, children)
257
+ primary, optional_suffix = children
258
+ optional_suffix == '' ? primary : {
259
+ '?' => Optional,
260
+ '*' => ZeroOrMore,
261
+ '+' => OneOrMore,
262
+ }.fetch(optional_suffix).new(primary)
263
+ end
264
+ end
265
+
266
+ class Grammar < Sequence
267
+ def initialize(source)
268
+ @_nodes = peg_grammar.parse(source)
269
+ @children = [ReferenceResolver.new(grammar).resolve]
270
+ end
271
+
272
+ def match(source)
273
+ @children[0].match(source)
274
+ end
275
+
276
+ def grammar
277
+ GrammarGenerator.visit(@_nodes)
278
+ end
279
+
280
+ def peg_grammar
281
+ end_of_line = Or.new(
282
+ Literal.new("\r\n"),
283
+ Literal.new("\n"),
284
+ Literal.new("\r"),
285
+ )
286
+ space = Or.new(Literal.new(" "), Literal.new("\t"), end_of_line)
287
+ comment = Sequence.new(
288
+ Literal.new('#'),
289
+ ZeroOrMore.new(
290
+ Sequence.new(Not.new(end_of_line), Regex.new('.')),
291
+ ),
292
+ end_of_line,
293
+ )
294
+ spacing = ZeroOrMore.new(Or.new(space, comment))
295
+
296
+ and_ = Sequence.new(Literal.new('&'), spacing)
297
+ not_ = Sequence.new(Literal.new('!'), spacing)
298
+ slash = Sequence.new(Literal.new('/'), spacing)
299
+ left_arrow = Sequence.new(Literal.new('<-'), spacing)
300
+ question = Sequence.new(Literal.new('?'), spacing)
301
+ star = Sequence.new(Literal.new('*'), spacing)
302
+ plus = Sequence.new(Literal.new('+'), spacing)
303
+ open = Sequence.new(Literal.new('('), spacing)
304
+ close = Sequence.new(Literal.new(')'), spacing)
305
+ dot = Sequence.new(Literal.new('.'), spacing).name('dot')
306
+
307
+ # HACK these three rules are simplified
308
+ literal = Sequence.new(
309
+ Or.new(Regex.new("'.*?'"), Regex.new('".*?"')),
310
+ spacing
311
+ ).name('literal')
312
+ class_ = Sequence.new(Regex.new('\[.*?\]'), spacing).name('class')
313
+ identifier = Sequence.new(
314
+ Regex.new('[A-Za-z0-9_]+').name('identifier__regex'),
315
+ spacing
316
+ ).name('identifier')
317
+
318
+ primary = Or.new(
319
+ Sequence.new(
320
+ identifier,
321
+ Not.new(left_arrow)
322
+ ).name('primary__sequence'),
323
+ Sequence.new(
324
+ open,
325
+ 'EXPRESSION', # paceholder for future substitution
326
+ close
327
+ ).name('primary__parens'),
328
+ literal,
329
+ class_,
330
+ dot,
331
+ ).name('primary')
332
+ suffix = Sequence.new(
333
+ primary,
334
+ Optional.new(
335
+ Or.new(question, star, plus)
336
+ ).name('suffix__optional'),
337
+ ).name('suffix')
338
+ prefix = Sequence.new(
339
+ Optional.new(
340
+ Or.new(and_, not_)
341
+ ).name('prefix__optional'),
342
+ suffix
343
+ ).name('prefix')
344
+ sequence = ZeroOrMore.new(prefix).name('sequence')
345
+ expression = Sequence.new(
346
+ sequence,
347
+ ZeroOrMore.new(
348
+ Sequence.new(
349
+ slash,
350
+ sequence
351
+ ).name('expression__sequence')
352
+ ).name('expression__zeroormore')
353
+ ).name('expression')
354
+ if primary.children[1].children[1] != 'EXPRESSION'
355
+ raise 'Invalid PEG grammar'
356
+ else
357
+ primary.children[1].children[1] = expression
358
+ end
359
+ definition = Sequence.new(
360
+ identifier,
361
+ left_arrow,
362
+ expression
363
+ ).name('definition')
364
+ # In the original PEG paper `grammar` is specified as:
365
+ # grammar <- spacing definition+ end_of_file
366
+ # but we skip `end_of_file` allowing the grammar to
367
+ # match just a part of source in order to know where
368
+ # the syntax error occured.
369
+ grammar = Sequence.new(
370
+ spacing,
371
+ OneOrMore.new(definition).name('grammar__oneormore')
372
+ ).name('grammar')
373
+
374
+ grammar
375
+ end
376
+ end
377
+
378
+ class ReferenceResolver
379
+ def initialize(rules)
380
+ @rules = rules
381
+ end
382
+
383
+ def resolve
384
+ _resolve!(@rules[0])
385
+ end
386
+
387
+ def _resolve!(rule)
388
+ if rule.class == Reference
389
+ resolved_rule = reference(rule.reference)
390
+ if resolved_rule.class == Reference
391
+ _resolve!(resolved_rule)
392
+ else
393
+ resolved_rule
394
+ end
395
+ elsif rule.children.length > 0
396
+ rule.children.map! {|child| _resolve!(child)}
397
+ rule
398
+ else
399
+ rule
400
+ end
401
+ end
402
+
403
+ def reference(name)
404
+ @rules.find {|r| r.name == name} || raise("rule `#{name}` not found")
405
+ end
406
+ end
407
+
408
+ class Language
409
+ @@default = proc {|node, children| children}
410
+ # we rely on the fact that 1.9+ Hash maintains order
411
+ @@rules = {}
412
+ @@blocks = {}
413
+
414
+ def self.rule(rule, &block)
415
+ name = rule.split('<-')[0].strip
416
+ @@rules[name] = rule
417
+ @@blocks[name] = block
418
+ end
419
+
420
+ def self.default(&block)
421
+ @@default = block
422
+ end
423
+
424
+ def eval(source)
425
+ grammar_source = @@rules.values.join("\n")
426
+ node = Grammar.new(grammar_source).parse(source)
427
+ _eval(node)
428
+ end
429
+
430
+ def _eval(node)
431
+ block = @@blocks.fetch(node.name, @@default)
432
+ children = node.children.map {|child| _eval(child)}
433
+ instance_exec(node, children, &block)
434
+ end
435
+ end
436
+ end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: peg
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Vladimir Keleshev
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-06-02 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Parsing Expression Grammar implmentation
15
+ email: vladimir@keleshev.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/peg.rb
21
+ homepage: http://github.com/halst/peg.rb
22
+ licenses: []
23
+ post_install_message:
24
+ rdoc_options: []
25
+ require_paths:
26
+ - lib
27
+ required_ruby_version: !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ none: false
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 1.8.23
42
+ signing_key:
43
+ specification_version: 3
44
+ summary: PEG implementation
45
+ test_files: []