peg 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/peg.rb +436 -0
  2. metadata +45 -0
@@ -0,0 +1,436 @@
1
+ module PEG
2
+ class ValueObject
3
+ def ==(other)
4
+ inspect == other.inspect
5
+ end
6
+ end
7
+
8
+ class Node < ValueObject
9
+ attr_accessor :text, :children, :name
10
+
11
+ def initialize(text, children=[], name=nil)
12
+ @text, @children, @name = text, children, name
13
+ end
14
+
15
+ def inspect
16
+ "#{self.class}.new(#{text.inspect}, #{children.inspect}, #{name.inspect})"
17
+ end
18
+ end
19
+
20
+ class Rule < ValueObject
21
+ attr_accessor :children
22
+
23
+ def initialize(*children)
24
+ @children = children
25
+ end
26
+
27
+ def name(value=nil)
28
+ if value
29
+ @name = value
30
+ self
31
+ else
32
+ @name
33
+ end
34
+ end
35
+
36
+ def parse(source)
37
+ node = match(source)
38
+ if node.text.length != source.length
39
+ raise SyntaxError.new source[node.text.length, 50].inspect
40
+ else
41
+ node
42
+ end
43
+ end
44
+
45
+ def result(text, children=[])
46
+ Node.new(text, children, @name)
47
+ end
48
+
49
+ def inspect
50
+ repr = "#{self.class}.new(#{_inspect})"
51
+ @name ? repr + ".name(#{@name.inspect})" : repr
52
+ end
53
+ end
54
+
55
+ class Literal < Rule
56
+ def initialize(literal)
57
+ @literal = literal
58
+ @children = []
59
+ end
60
+
61
+ def match(text)
62
+ text.start_with?(@literal) ? result(@literal) : nil
63
+ end
64
+
65
+ def _inspect
66
+ @literal.inspect
67
+ end
68
+ end
69
+
70
+ class Regex < Literal
71
+ def match(text)
72
+ res = Regexp.new('\A' + @literal).match(text)
73
+ res && result(res.to_s)
74
+ end
75
+ end
76
+
77
+ class Sequence < Rule
78
+ def match(text)
79
+ text_ = String.new(text)
80
+ len = 0
81
+ children = []
82
+ @children.each do |child|
83
+ node = child.match(text_)
84
+ if node == nil
85
+ return nil
86
+ else
87
+ children << node
88
+ text_ = text_.slice node.text.length..text_.length
89
+ len += node.text.length
90
+ end
91
+ end
92
+ result(text.slice(0...len), children)
93
+ end
94
+
95
+ def _inspect
96
+ @children.map(&:inspect).join(', ')
97
+ end
98
+ end
99
+
100
+ class Or < Sequence
101
+ def match(text)
102
+ @children.each do |child|
103
+ node = child.match(text)
104
+ return result(node.text, [node]) if node
105
+ end
106
+ nil
107
+ end
108
+ end
109
+
110
+ class Not < Sequence
111
+ def match(text)
112
+ @children[0].match(text) ? nil : result('')
113
+ end
114
+ end
115
+
116
+ class And < Sequence
117
+ def match(text)
118
+ @children[0].match(text) ? result('') : nil
119
+ end
120
+ end
121
+
122
+ class OneOrMore < Sequence
123
+ @range = (1..Float::INFINITY)
124
+
125
+ class << self
126
+ attr_accessor :range
127
+ end
128
+
129
+ def match(text)
130
+ text_ = String.new(text)
131
+ len = 0
132
+ children = []
133
+ loop do
134
+ node = @children[0].match(text_)
135
+ break if not node
136
+ children << node
137
+ break if node.text == ''
138
+ text_ = text_.slice node.text.length..text_.length
139
+ len += node.text.length
140
+ end
141
+ in_range = self.class.range.include?(children.length)
142
+ in_range ? result(text.slice(0...len), children) : nil
143
+ end
144
+ end
145
+
146
+ class ZeroOrMore < OneOrMore
147
+ @range = (0..Float::INFINITY)
148
+ end
149
+
150
+ class Optional < OneOrMore
151
+ @range = (0..1)
152
+ end
153
+
154
+ class Reference < Rule
155
+ attr_reader :reference
156
+
157
+ def initialize(name)
158
+ @reference = name
159
+ @children = []
160
+ end
161
+
162
+ def _inspect
163
+ @reference.inspect
164
+ end
165
+ end
166
+
167
+ class Visitor
168
+ def self.visit(node)
169
+ return node if node.name == nil
170
+ send(node.name, node, node.children.map {|c| visit(c)})
171
+ end
172
+ end
173
+
174
+ class GrammarGenerator < Visitor
175
+ def self.identifier__regex(node, children)
176
+ node.text
177
+ end
178
+
179
+ def self.identifier(node, children)
180
+ identifier_regex, spacing = children
181
+ Reference.new(identifier_regex)
182
+ end
183
+
184
+ def self.literal(node, children)
185
+ Literal.new(Kernel.eval(node.text))
186
+ end
187
+
188
+ def self.dot(node, children)
189
+ Regex.new('.')
190
+ end
191
+
192
+ def self.class(node, children)
193
+ class_, spacing = children
194
+ Regex.new(class_.text)
195
+ end
196
+
197
+ def self.definition(node, children)
198
+ identifier, left_arrow, expression = children
199
+ expression.name(identifier.reference)
200
+ end
201
+
202
+ def self.expression(node, children)
203
+ sequence, rest = children
204
+ rest.length == 0 ? sequence : Or.new(sequence, *rest)
205
+ end
206
+
207
+ def self.expression__zeroormore(node, children)
208
+ children
209
+ end
210
+
211
+ def self.expression__sequence(node, children)
212
+ slash, sequence = children
213
+ sequence
214
+ end
215
+
216
+ def self.grammar(node, children)
217
+ spacing, definitions = children
218
+ definitions
219
+ end
220
+
221
+ def self.grammar__oneormore(node, children)
222
+ children
223
+ end
224
+
225
+ def self.primary(node, children)
226
+ children[0]
227
+ end
228
+
229
+ def self.primary__sequence(node, children)
230
+ identifier, not_left_arrow = children
231
+ identifier
232
+ end
233
+
234
+ def self.primary__parens(node, children)
235
+ open, expression, close = children
236
+ expression
237
+ end
238
+
239
+ def self.prefix__optional(node, children)
240
+ node.text.strip # HACK
241
+ end
242
+
243
+ def self.prefix(node, children)
244
+ prefix, suffix = children
245
+ prefix == '' ? suffix : {'&' => And, '!' => Not}.fetch(prefix).new(suffix)
246
+ end
247
+
248
+ def self.sequence(node, children)
249
+ children.length == 1 ? children[0] : Sequence.new(*children)
250
+ end
251
+
252
+ def self.suffix__optional(node, children)
253
+ node.text.strip # HACK
254
+ end
255
+
256
+ def self.suffix(node, children)
257
+ primary, optional_suffix = children
258
+ optional_suffix == '' ? primary : {
259
+ '?' => Optional,
260
+ '*' => ZeroOrMore,
261
+ '+' => OneOrMore,
262
+ }.fetch(optional_suffix).new(primary)
263
+ end
264
+ end
265
+
266
+ class Grammar < Sequence
267
+ def initialize(source)
268
+ @_nodes = peg_grammar.parse(source)
269
+ @children = [ReferenceResolver.new(grammar).resolve]
270
+ end
271
+
272
+ def match(source)
273
+ @children[0].match(source)
274
+ end
275
+
276
+ def grammar
277
+ GrammarGenerator.visit(@_nodes)
278
+ end
279
+
280
+ def peg_grammar
281
+ end_of_line = Or.new(
282
+ Literal.new("\r\n"),
283
+ Literal.new("\n"),
284
+ Literal.new("\r"),
285
+ )
286
+ space = Or.new(Literal.new(" "), Literal.new("\t"), end_of_line)
287
+ comment = Sequence.new(
288
+ Literal.new('#'),
289
+ ZeroOrMore.new(
290
+ Sequence.new(Not.new(end_of_line), Regex.new('.')),
291
+ ),
292
+ end_of_line,
293
+ )
294
+ spacing = ZeroOrMore.new(Or.new(space, comment))
295
+
296
+ and_ = Sequence.new(Literal.new('&'), spacing)
297
+ not_ = Sequence.new(Literal.new('!'), spacing)
298
+ slash = Sequence.new(Literal.new('/'), spacing)
299
+ left_arrow = Sequence.new(Literal.new('<-'), spacing)
300
+ question = Sequence.new(Literal.new('?'), spacing)
301
+ star = Sequence.new(Literal.new('*'), spacing)
302
+ plus = Sequence.new(Literal.new('+'), spacing)
303
+ open = Sequence.new(Literal.new('('), spacing)
304
+ close = Sequence.new(Literal.new(')'), spacing)
305
+ dot = Sequence.new(Literal.new('.'), spacing).name('dot')
306
+
307
+ # HACK these three rules are simplified
308
+ literal = Sequence.new(
309
+ Or.new(Regex.new("'.*?'"), Regex.new('".*?"')),
310
+ spacing
311
+ ).name('literal')
312
+ class_ = Sequence.new(Regex.new('\[.*?\]'), spacing).name('class')
313
+ identifier = Sequence.new(
314
+ Regex.new('[A-Za-z0-9_]+').name('identifier__regex'),
315
+ spacing
316
+ ).name('identifier')
317
+
318
+ primary = Or.new(
319
+ Sequence.new(
320
+ identifier,
321
+ Not.new(left_arrow)
322
+ ).name('primary__sequence'),
323
+ Sequence.new(
324
+ open,
325
+ 'EXPRESSION', # paceholder for future substitution
326
+ close
327
+ ).name('primary__parens'),
328
+ literal,
329
+ class_,
330
+ dot,
331
+ ).name('primary')
332
+ suffix = Sequence.new(
333
+ primary,
334
+ Optional.new(
335
+ Or.new(question, star, plus)
336
+ ).name('suffix__optional'),
337
+ ).name('suffix')
338
+ prefix = Sequence.new(
339
+ Optional.new(
340
+ Or.new(and_, not_)
341
+ ).name('prefix__optional'),
342
+ suffix
343
+ ).name('prefix')
344
+ sequence = ZeroOrMore.new(prefix).name('sequence')
345
+ expression = Sequence.new(
346
+ sequence,
347
+ ZeroOrMore.new(
348
+ Sequence.new(
349
+ slash,
350
+ sequence
351
+ ).name('expression__sequence')
352
+ ).name('expression__zeroormore')
353
+ ).name('expression')
354
+ if primary.children[1].children[1] != 'EXPRESSION'
355
+ raise 'Invalid PEG grammar'
356
+ else
357
+ primary.children[1].children[1] = expression
358
+ end
359
+ definition = Sequence.new(
360
+ identifier,
361
+ left_arrow,
362
+ expression
363
+ ).name('definition')
364
+ # In the original PEG paper `grammar` is specified as:
365
+ # grammar <- spacing definition+ end_of_file
366
+ # but we skip `end_of_file` allowing the grammar to
367
+ # match just a part of source in order to know where
368
+ # the syntax error occured.
369
+ grammar = Sequence.new(
370
+ spacing,
371
+ OneOrMore.new(definition).name('grammar__oneormore')
372
+ ).name('grammar')
373
+
374
+ grammar
375
+ end
376
+ end
377
+
378
+ class ReferenceResolver
379
+ def initialize(rules)
380
+ @rules = rules
381
+ end
382
+
383
+ def resolve
384
+ _resolve!(@rules[0])
385
+ end
386
+
387
+ def _resolve!(rule)
388
+ if rule.class == Reference
389
+ resolved_rule = reference(rule.reference)
390
+ if resolved_rule.class == Reference
391
+ _resolve!(resolved_rule)
392
+ else
393
+ resolved_rule
394
+ end
395
+ elsif rule.children.length > 0
396
+ rule.children.map! {|child| _resolve!(child)}
397
+ rule
398
+ else
399
+ rule
400
+ end
401
+ end
402
+
403
+ def reference(name)
404
+ @rules.find {|r| r.name == name} || raise("rule `#{name}` not found")
405
+ end
406
+ end
407
+
408
+ class Language
409
+ @@default = proc {|node, children| children}
410
+ # we rely on the fact that 1.9+ Hash maintains order
411
+ @@rules = {}
412
+ @@blocks = {}
413
+
414
+ def self.rule(rule, &block)
415
+ name = rule.split('<-')[0].strip
416
+ @@rules[name] = rule
417
+ @@blocks[name] = block
418
+ end
419
+
420
+ def self.default(&block)
421
+ @@default = block
422
+ end
423
+
424
+ def eval(source)
425
+ grammar_source = @@rules.values.join("\n")
426
+ node = Grammar.new(grammar_source).parse(source)
427
+ _eval(node)
428
+ end
429
+
430
+ def _eval(node)
431
+ block = @@blocks.fetch(node.name, @@default)
432
+ children = node.children.map {|child| _eval(child)}
433
+ instance_exec(node, children, &block)
434
+ end
435
+ end
436
+ end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: peg
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Vladimir Keleshev
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-06-02 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Parsing Expression Grammar implmentation
15
+ email: vladimir@keleshev.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/peg.rb
21
+ homepage: http://github.com/halst/peg.rb
22
+ licenses: []
23
+ post_install_message:
24
+ rdoc_options: []
25
+ require_paths:
26
+ - lib
27
+ required_ruby_version: !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ none: false
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 1.8.23
42
+ signing_key:
43
+ specification_version: 3
44
+ summary: PEG implementation
45
+ test_files: []